summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp30
-rw-r--r--contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/GlobalsModRef.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp199
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp27
-rw-r--r--contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp12
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp23
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp33
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp78
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h16
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp151
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp38
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp87
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp117
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrap.cpp57
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp25
-rw-r--r--contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp40
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp (renamed from contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp)4
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h3
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp57
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h16
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp83
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp46
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h6
-rw-r--r--contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp3
-rw-r--r--contrib/llvm/lib/IR/AsmWriter.cpp2
-rw-r--r--contrib/llvm/lib/IR/Core.cpp2
-rw-r--r--contrib/llvm/lib/IR/Function.cpp44
-rw-r--r--contrib/llvm/lib/IR/IRPrintingPasses.cpp11
-rw-r--r--contrib/llvm/lib/IR/LLVMContext.cpp16
-rw-r--r--contrib/llvm/lib/IR/LLVMContextImpl.h7
-rw-r--r--contrib/llvm/lib/IR/LegacyPassManager.cpp13
-rw-r--r--contrib/llvm/lib/IR/Metadata.cpp2
-rw-r--r--contrib/llvm/lib/IR/Verifier.cpp317
-rw-r--r--contrib/llvm/lib/LTO/LTOCodeGenerator.cpp3
-rw-r--r--contrib/llvm/lib/Linker/IRMover.cpp24
-rw-r--r--contrib/llvm/lib/Linker/LinkModules.cpp157
-rw-r--r--contrib/llvm/lib/MC/MCExpr.cpp1
-rw-r--r--contrib/llvm/lib/MC/MCObjectFileInfo.cpp10
-rw-r--r--contrib/llvm/lib/MC/MCObjectStreamer.cpp2
-rw-r--r--contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp8
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp24
-rw-r--r--contrib/llvm/lib/Object/ELF.cpp7
-rw-r--r--contrib/llvm/lib/ProfileData/CoverageMapping.cpp2
-rw-r--r--contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp36
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProf.cpp62
-rw-r--r--contrib/llvm/lib/ProfileData/InstrProfWriter.cpp16
-rw-r--r--contrib/llvm/lib/Support/Debug.cpp8
-rw-r--r--contrib/llvm/lib/Support/IntEqClasses.cpp4
-rw-r--r--contrib/llvm/lib/Support/Triple.cpp56
-rw-r--r--contrib/llvm/lib/Support/Windows/Path.inc1
-rw-r--r--contrib/llvm/lib/Support/Windows/Signals.inc5
-rw-r--r--contrib/llvm/lib/Support/Windows/WindowsSupport.h31
-rw-r--r--contrib/llvm/lib/Support/raw_ostream.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp57
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp54
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td67
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp300
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h9
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIDefines.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp14
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp132
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h7
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp1968
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h489
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp96
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td15
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp138
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h11
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td21
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp28
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h5
-rw-r--r--contrib/llvm/lib/Target/AVR/AVR.h54
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h29
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp40
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp26
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp60
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRDF.h28
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp272
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp12
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp30
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp19
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h20
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp180
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFCopy.h48
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp204
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h65
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp1716
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFGraph.h841
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp848
-rw-r--r--contrib/llvm/lib/Target/Hexagon/RDFLiveness.h106
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp19
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h1
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp100
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp7
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp182
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td3
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt3
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt23
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile16
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp148
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp85
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp26
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp90
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp83
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h42
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp94
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h68
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp63
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp153
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp14
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td25
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td33
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td246
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp35
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h3
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp8
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt19
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h7
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td6
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp3
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp298
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td114
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td36
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td160
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMPX.td28
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td11
-rw-r--r--contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h148
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h7
-rw-r--r--contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp195
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp12
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp218
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp488
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp102
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp61
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp72
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp101
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp165
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp44
237 files changed, 12588 insertions, 2476 deletions
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 85404d8..c3d2803 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -586,8 +586,13 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
}
-ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
- unsigned ArgIdx) {
+/// Returns true if this is a writeonly (i.e Mod only) parameter. Currently,
+/// we don't have a writeonly attribute, so this only knows about builtin
+/// intrinsics and target library functions. We could consider adding a
+/// writeonly attribute in the future and moving all of these facts to either
+/// Intrinsics.td or InferFunctionAttr.cpp
+static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
+ const TargetLibraryInfo &TLI) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
switch (II->getIntrinsicID()) {
default:
@@ -597,9 +602,9 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
case Intrinsic::memmove:
// We don't currently have a writeonly attribute. All other properties
// of these intrinsics are nicely described via attributes in
- // Intrinsics.td and handled generically below.
+ // Intrinsics.td and handled generically.
if (ArgIdx == 0)
- return MRI_Mod;
+ return true;
}
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -609,7 +614,22 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
// handled via InferFunctionAttr.
if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
if (ArgIdx == 0)
- return MRI_Mod;
+ return true;
+
+ // TODO: memset_pattern4, memset_pattern8
+ // TODO: _chk variants
+ // TODO: strcmp, strcpy
+
+ return false;
+}
+
+ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+ unsigned ArgIdx) {
+
+ // Emulate the missing writeonly attribute by checking for known builtin
+ // intrinsics and target library functions.
+ if (isWriteOnlyParam(CS, ArgIdx, TLI))
+ return MRI_Mod;
if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
return MRI_Ref;
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 07b389a..6dd1d0a 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -612,9 +612,10 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override {
Out << Banner;
for (CallGraphNode *CGN : SCC) {
- if (CGN->getFunction())
- CGN->getFunction()->print(Out);
- else
+ if (CGN->getFunction()) {
+ if (isFunctionInPrintList(CGN->getFunction()->getName()))
+ CGN->getFunction()->print(Out);
+ } else
Out << "\nPrinting <null> Function\n";
}
return false;
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index 249f395..1babb82 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -358,21 +358,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
if (Writers)
Writers->insert(CS->getParent()->getParent());
- } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) {
- Function *ParentF = CS->getParent()->getParent();
- // A nocapture argument may be read from or written to, but does not
- // escape unless the call can somehow recurse.
- //
- // nocapture "indicates that the callee does not make any copies of
- // the pointer that outlive itself". Therefore if we directly or
- // indirectly recurse, we must treat the pointer as escaping.
- if (FunctionToSCCMap[ParentF] ==
- FunctionToSCCMap[CS.getCalledFunction()])
- return true;
- if (Readers)
- Readers->insert(ParentF);
- if (Writers)
- Writers->insert(ParentF);
} else {
return true; // Argument of an unknown call.
}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index b89ff26..6dfe625 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -70,7 +70,7 @@ static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
-/// getFalse - For a boolean type, or a vector of boolean type, return false, or
+/// For a boolean type, or a vector of boolean type, return false, or
/// a vector with every element false, as appropriate for the type.
static Constant *getFalse(Type *Ty) {
assert(Ty->getScalarType()->isIntegerTy(1) &&
@@ -78,7 +78,7 @@ static Constant *getFalse(Type *Ty) {
return Constant::getNullValue(Ty);
}
-/// getTrue - For a boolean type, or a vector of boolean type, return true, or
+/// For a boolean type, or a vector of boolean type, return true, or
/// a vector with every element true, as appropriate for the type.
static Constant *getTrue(Type *Ty) {
assert(Ty->getScalarType()->isIntegerTy(1) &&
@@ -100,7 +100,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
CRHS == LHS;
}
-/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+/// Does the given value dominate the specified phi node?
static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
@@ -131,8 +131,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
return false;
}
-/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
-/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
+/// Simplify "A op (B op' C)" by distributing op over op', turning it into
+/// "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
/// Returns the simplified value, or null if no simplification was performed.
@@ -193,8 +193,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return nullptr;
}
-/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
-/// operations. Returns the simpler value, or null if none was found.
+/// Generic simplifications for associative binary operations.
+/// Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
@@ -290,10 +290,10 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
return nullptr;
}
-/// ThreadBinOpOverSelect - In the case of a binary operation with a select
-/// instruction as an operand, try to simplify the binop by seeing whether
-/// evaluating it on both branches of the select results in the same value.
-/// Returns the common value if so, otherwise returns null.
+/// In the case of a binary operation with a select instruction as an operand,
+/// try to simplify the binop by seeing whether evaluating it on both branches
+/// of the select results in the same value. Returns the common value if so,
+/// otherwise returns null.
static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -362,10 +362,9 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
return nullptr;
}
-/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
-/// try to simplify the comparison by seeing whether both branches of the select
-/// result in the same value. Returns the common value if so, otherwise returns
-/// null.
+/// In the case of a comparison with a select instruction, try to simplify the
+/// comparison by seeing whether both branches of the select result in the same
+/// value. Returns the common value if so, otherwise returns null.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const Query &Q,
unsigned MaxRecurse) {
@@ -444,10 +443,10 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
-/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
-/// it on the incoming phi values yields the same result for every value. If so
-/// returns the common value, otherwise returns null.
+/// In the case of a binary operation with an operand that is a PHI instruction,
+/// try to simplify the binop by seeing whether evaluating it on the incoming
+/// phi values yields the same result for every value. If so returns the common
+/// value, otherwise returns null.
static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -486,10 +485,10 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
return CommonValue;
}
-/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
-/// try to simplify the comparison by seeing whether comparing with all of the
-/// incoming phi values yields the same result every time. If so returns the
-/// common result, otherwise returns null.
+/// In the case of a comparison with a PHI instruction, try to simplify the
+/// comparison by seeing whether comparing with all of the incoming phi values
+/// yields the same result every time. If so returns the common result,
+/// otherwise returns null.
static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -524,8 +523,8 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
return CommonValue;
}
-/// SimplifyAddInst - Given operands for an Add, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Add, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -656,8 +655,8 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
return ConstantExpr::getSub(LHSOffset, RHSOffset);
}
-/// SimplifySubInst - Given operands for a Sub, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a Sub, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0))
@@ -889,8 +888,8 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifyMulInst - Given operands for a Mul, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a Mul, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -989,8 +988,8 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SDiv or UDiv, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1075,8 +1074,8 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifySDivInst - Given operands for an SDiv, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SDiv, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
@@ -1093,8 +1092,8 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyUDivInst - Given operands for a UDiv, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a UDiv, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
@@ -1154,8 +1153,8 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
RecursionLimit);
}
-/// SimplifyRem - Given operands for an SRem or URem, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SRem or URem, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1215,8 +1214,8 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifySRemInst - Given operands for an SRem, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SRem, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
@@ -1233,8 +1232,8 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyURemInst - Given operands for a URem, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a URem, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
@@ -1279,7 +1278,7 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
RecursionLimit);
}
-/// isUndefShift - Returns true if a shift by \c Amount always yields undef.
+/// Returns true if a shift by \c Amount always yields undef.
static bool isUndefShift(Value *Amount) {
Constant *C = dyn_cast<Constant>(Amount);
if (!C)
@@ -1306,8 +1305,8 @@ static bool isUndefShift(Value *Amount) {
return false;
}
-/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Shl, LShr or AShr, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1375,8 +1374,8 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifyShlInst - Given operands for an Shl, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Shl, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
@@ -1402,8 +1401,8 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
RecursionLimit);
}
-/// SimplifyLShrInst - Given operands for an LShr, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an LShr, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
@@ -1427,8 +1426,8 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
RecursionLimit);
}
-/// SimplifyAShrInst - Given operands for an AShr, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an AShr, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
@@ -1502,8 +1501,8 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
return nullptr;
}
-// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
-// of possible values cannot be satisfied.
+/// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
+/// of possible values cannot be satisfied.
static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
ConstantInt *CI1, *CI2;
@@ -1554,8 +1553,8 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-/// SimplifyAndInst - Given operands for an And, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an And, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1661,8 +1660,8 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
-// contains all possible values.
+/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
+/// contains all possible values.
static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
ConstantInt *CI1, *CI2;
@@ -1713,8 +1712,8 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-/// SimplifyOrInst - Given operands for an Or, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Or, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1849,8 +1848,8 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyXorInst - Given operands for a Xor, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a Xor, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1910,9 +1909,9 @@ static Type *GetCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
-/// ExtractEquivalentCondition - Rummage around inside V looking for something
-/// equivalent to the comparison "LHS Pred RHS". Return such a value if found,
-/// otherwise return null. Helper function for analyzing max/min idioms.
+/// Rummage around inside V looking for something equivalent to the comparison
+/// "LHS Pred RHS". Return such a value if found, otherwise return null.
+/// Helper function for analyzing max/min idioms.
static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
Value *LHS, Value *RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -2100,21 +2099,17 @@ static Constant *computePointerICmp(const DataLayout &DL,
// that might be resolve lazily to symbols in another dynamically-loaded
// library (and, thus, could be malloc'ed by the implementation).
auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
- return std::all_of(Objects.begin(), Objects.end(),
- [](Value *V){
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return AI->getParent() && AI->getParent()->getParent() &&
- AI->isStaticAlloca();
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return (GV->hasLocalLinkage() ||
- GV->hasHiddenVisibility() ||
- GV->hasProtectedVisibility() ||
- GV->hasUnnamedAddr()) &&
- !GV->isThreadLocal();
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValAttr();
- return false;
- });
+ return std::all_of(Objects.begin(), Objects.end(), [](Value *V) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
+ GV->hasProtectedVisibility() || GV->hasUnnamedAddr()) &&
+ !GV->isThreadLocal();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+ });
};
if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
@@ -2127,8 +2122,8 @@ static Constant *computePointerICmp(const DataLayout &DL,
return nullptr;
}
-/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an ICmpInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
@@ -3102,8 +3097,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
RecursionLimit);
}
-/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an FCmpInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
FastMathFlags FMF, const Query &Q,
unsigned MaxRecurse) {
@@ -3227,8 +3222,7 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
-/// replaced with RepOp.
+/// See if V simplifies when its operand Op is replaced with RepOp.
static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const Query &Q,
unsigned MaxRecurse) {
@@ -3311,8 +3305,8 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return nullptr;
}
-/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
-/// the result. If not, this returns null.
+/// Given operands for a SelectInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
Value *FalseVal, const Query &Q,
unsigned MaxRecurse) {
@@ -3449,8 +3443,8 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an GetElementPtrInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
const Query &Q, unsigned) {
// The type of the GEP pointer operand.
@@ -3542,8 +3536,8 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
-/// can fold the result. If not, this returns null.
+/// Given operands for an InsertValueInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
ArrayRef<unsigned> Idxs, const Query &Q,
unsigned) {
@@ -3579,8 +3573,8 @@ Value *llvm::SimplifyInsertValueInst(
RecursionLimit);
}
-/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we
-/// can fold the result. If not, this returns null.
+/// Given operands for an ExtractValueInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const Query &, unsigned) {
if (auto *CAgg = dyn_cast<Constant>(Agg))
@@ -3614,8 +3608,8 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
RecursionLimit);
}
-/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we
-/// can fold the result. If not, this returns null.
+/// Given operands for an ExtractElementInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
unsigned) {
if (auto *CVec = dyn_cast<Constant>(Vec)) {
@@ -3646,7 +3640,7 @@ Value *llvm::SimplifyExtractElementInst(
RecursionLimit);
}
-/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
+/// See if we can fold the given phi. If not, returns null.
static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
@@ -3696,8 +3690,8 @@ Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
//=== Helper functions for higher up the class hierarchy.
-/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a BinaryOperator, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
switch (Opcode) {
@@ -3763,8 +3757,8 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
}
-/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a BinaryOperator, see if we can fold the result.
+/// If not, this returns null.
/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
@@ -3799,8 +3793,7 @@ Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
RecursionLimit);
}
-/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
-/// fold the result.
+/// Given operands for a CmpInst, see if we can fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
@@ -3938,8 +3931,8 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyInstruction - See if we can compute a simplified version of this
-/// instruction. If not, this returns null.
+/// See if we can compute a simplified version of this instruction.
+/// If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC) {
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index d7896ad..8bcdcb8 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -845,6 +845,7 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
if (Lp != AR->getLoop()) {
DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
*Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
}
// The address calculation must not wrap. Otherwise, a dependence could be
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 9ab9eea..0c725fc 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -637,8 +637,10 @@ LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
analyze(DomTree);
}
-void LoopInfo::updateUnloop(Loop *Unloop) {
- Unloop->markUnlooped();
+void LoopInfo::markAsRemoved(Loop *Unloop) {
+ assert(!Unloop->isInvalid() && "Loop has already been removed");
+ Unloop->invalidate();
+ RemovedLoops.push_back(Unloop);
// First handle the special case of no parent loop to simplify the algorithm.
if (!Unloop->getParentLoop()) {
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index dc42473..8163231 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -42,7 +42,11 @@ public:
}
bool runOnLoop(Loop *L, LPPassManager &) override {
- P.run(*L);
+ auto BBI = find_if(L->blocks().begin(), L->blocks().end(),
+ [](BasicBlock *BB) { return BB; });
+ if (BBI != L->blocks().end() &&
+ isFunctionInPrintList((*BBI)->getParent()->getName()))
+ P.run(*L);
return false;
}
};
@@ -174,8 +178,9 @@ bool LPPassManager::runOnFunction(Function &F) {
// Walk Loops
while (!LQ.empty()) {
-
+ bool LoopWasDeleted = false;
CurrentLoop = LQ.back();
+
// Run all passes on the current Loop.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
@@ -192,15 +197,15 @@ bool LPPassManager::runOnFunction(Function &F) {
Changed |= P->runOnLoop(CurrentLoop, *this);
}
+ LoopWasDeleted = CurrentLoop->isInvalid();
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
- CurrentLoop->isUnloop()
- ? "<deleted>"
- : CurrentLoop->getHeader()->getName());
+ LoopWasDeleted ? "<deleted>"
+ : CurrentLoop->getHeader()->getName());
dumpPreservedSet(P);
- if (CurrentLoop->isUnloop()) {
+ if (LoopWasDeleted) {
// Notify passes that the loop is being deleted.
deleteSimpleAnalysisLoop(CurrentLoop);
} else {
@@ -222,12 +227,11 @@ bool LPPassManager::runOnFunction(Function &F) {
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
- removeDeadPasses(P, CurrentLoop->isUnloop()
- ? "<deleted>"
- : CurrentLoop->getHeader()->getName(),
+ removeDeadPasses(P, LoopWasDeleted ? "<deleted>"
+ : CurrentLoop->getHeader()->getName(),
ON_LOOP_MSG);
- if (CurrentLoop->isUnloop())
+ if (LoopWasDeleted)
// Do not run other passes on this loop.
break;
}
@@ -235,12 +239,11 @@ bool LPPassManager::runOnFunction(Function &F) {
// If the loop was deleted, release all the loop passes. This frees up
// some memory, and avoids trouble with the pass manager trying to call
// verifyAnalysis on them.
- if (CurrentLoop->isUnloop()) {
+ if (LoopWasDeleted) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
Pass *P = getContainedPass(Index);
freePass(P, "<deleted>", ON_LOOP_MSG);
}
- delete CurrentLoop;
}
// Pop the loop from queue after running all passes.
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 029997a..486f3a5 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -26,7 +26,7 @@
// ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 }
//
// When evaluating an aliasing query, if one of the instructions is associated
-// has a set of noalias scopes in some domain that is superset of the alias
+// has a set of noalias scopes in some domain that is a superset of the alias
// scopes in that domain of some other instruction, then the two memory
// accesses are assumed not to alias.
//
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 805f3ef..9f92391 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -70,7 +70,7 @@
// A a;
// } B;
//
-// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// For an access to B.a.s, we attach !5 (a path tag node) to the load/store
// instruction. The base type is !4 (struct B), the access type is !2 (scalar
// type short) and the offset is 4.
//
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index abc57ed..a83e207 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -2556,6 +2556,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
switch (I->getOpcode()) {
default: break;
+ // Unsigned integers are always nonnegative.
+ case Instruction::UIToFP:
+ return true;
case Instruction::FMul:
// x*x is always non-negative or a NaN.
if (I->getOperand(0) == I->getOperand(1))
@@ -2566,6 +2569,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
case Instruction::FRem:
return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+ case Instruction::Select:
+ return CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
case Instruction::FPExt:
case Instruction::FPTrunc:
// Widening/narrowing never change sign.
@@ -2574,6 +2580,12 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::maxnum:
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) ||
+ CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+ case Intrinsic::minnum:
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::fabs:
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c7606fd..2ad4b32 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2654,8 +2654,6 @@ std::error_code BitcodeReader::parseConstants() {
return error("Invalid record");
Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
- unsigned Size = Record.size();
-
if (EltTy->isIntegerTy(8)) {
SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
if (isa<VectorType>(CurTy))
@@ -2680,21 +2678,24 @@ std::error_code BitcodeReader::parseConstants() {
V = ConstantDataVector::get(Context, Elts);
else
V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isHalfTy()) {
+ SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::getFP(Context, Elts);
+ else
+ V = ConstantDataArray::getFP(Context, Elts);
} else if (EltTy->isFloatTy()) {
- SmallVector<float, 16> Elts(Size);
- std::transform(Record.begin(), Record.end(), Elts.begin(), BitsToFloat);
+ SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
if (isa<VectorType>(CurTy))
- V = ConstantDataVector::get(Context, Elts);
+ V = ConstantDataVector::getFP(Context, Elts);
else
- V = ConstantDataArray::get(Context, Elts);
+ V = ConstantDataArray::getFP(Context, Elts);
} else if (EltTy->isDoubleTy()) {
- SmallVector<double, 16> Elts(Size);
- std::transform(Record.begin(), Record.end(), Elts.begin(),
- BitsToDouble);
+ SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
if (isa<VectorType>(CurTy))
- V = ConstantDataVector::get(Context, Elts);
+ V = ConstantDataVector::getFP(Context, Elts);
else
- V = ConstantDataArray::get(Context, Elts);
+ V = ConstantDataArray::getFP(Context, Elts);
} else {
return error("Invalid type for value");
}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index a1f8786..a899a0c 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1630,19 +1630,10 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
if (isa<IntegerType>(EltTy)) {
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
Record.push_back(CDS->getElementAsInteger(i));
- } else if (EltTy->isFloatTy()) {
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union { float F; uint32_t I; };
- F = CDS->getElementAsFloat(i);
- Record.push_back(I);
- }
} else {
- assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union { double F; uint64_t I; };
- F = CDS->getElementAsDouble(i);
- Record.push_back(I);
- }
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+ Record.push_back(
+ CDS->getElementAsAPFloat(i).bitcastToAPInt().getLimitedValue());
}
} else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
isa<ConstantVector>(C)) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index be7eafb..5f67d3d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -192,22 +192,26 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
Triple TT(getTargetTriple());
- if (TT.isOSDarwin()) {
+ // If there is a version specified, Major will be non-zero.
+ if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
unsigned Major, Minor, Update;
- TT.getOSVersion(Major, Minor, Update);
- // If there is a version specified, Major will be non-zero.
- if (Major) {
- MCVersionMinType VersionType;
- if (TT.isWatchOS())
- VersionType = MCVM_WatchOSVersionMin;
- else if (TT.isTvOS())
- VersionType = MCVM_TvOSVersionMin;
- else if (TT.isMacOSX())
- VersionType = MCVM_OSXVersionMin;
- else
- VersionType = MCVM_IOSVersionMin;
- OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
+ MCVersionMinType VersionType;
+ if (TT.isWatchOS()) {
+ VersionType = MCVM_WatchOSVersionMin;
+ TT.getWatchOSVersion(Major, Minor, Update);
+ } else if (TT.isTvOS()) {
+ VersionType = MCVM_TvOSVersionMin;
+ TT.getiOSVersion(Major, Minor, Update);
+ } else if (TT.isMacOSX()) {
+ VersionType = MCVM_OSXVersionMin;
+ if (!TT.getMacOSXVersion(Major, Minor, Update))
+ Major = 0;
+ } else {
+ VersionType = MCVM_IOSVersionMin;
+ TT.getiOSVersion(Major, Minor, Update);
}
+ if (Major != 0)
+ OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
}
// Allow the target to emit any magic that it wants at the start of the file.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index bf794f7..7b0cdbd 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -32,6 +32,39 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// EmittingAsmStreamer Implementation
+//===----------------------------------------------------------------------===//
+unsigned EmittingAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) {
+ AP->EmitULEB128(Value, Desc, PadTo);
+ return 0;
+}
+
+unsigned EmittingAsmStreamer::emitInt8(unsigned char Value) {
+ AP->EmitInt8(Value);
+ return 0;
+}
+
+unsigned EmittingAsmStreamer::emitBytes(StringRef Data) {
+ AP->OutStreamer->EmitBytes(Data);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// SizeReporterAsmStreamer Implementation
+//===----------------------------------------------------------------------===//
+unsigned SizeReporterAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) {
+ return getULEB128Size(Value);
+}
+
+unsigned SizeReporterAsmStreamer::emitInt8(unsigned char Value) { return 1; }
+
+unsigned SizeReporterAsmStreamer::emitBytes(StringRef Data) {
+ return Data.size();
+}
+
+//===----------------------------------------------------------------------===//
// DIEAbbrevData Implementation
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1f0c06f..a4fb07e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -561,6 +561,8 @@ void DwarfDebug::finalizeModuleInfo() {
// Collect info for variables that were optimized out.
collectDeadVariables();
+ unsigned MacroOffset = 0;
+ std::unique_ptr<AsmStreamerBase> AS(new SizeReporterAsmStreamer(Asm));
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
@@ -613,6 +615,15 @@ void DwarfDebug::finalizeModuleInfo() {
U.setBaseAddress(TheCU.getRanges().front().getStart());
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
+
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ if (CUNode->getMacros()) {
+ // Compile Unit has macros, emit "DW_AT_macro_info" attribute.
+ U.addUInt(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ dwarf::DW_FORM_sec_offset, MacroOffset);
+ // Update macro section offset
+ MacroOffset += handleMacroNodes(AS.get(), CUNode->getMacros(), U);
+ }
}
// Compute DIE offsets and sizes.
@@ -656,6 +667,9 @@ void DwarfDebug::endModule() {
// Emit info into a debug ranges section.
emitDebugRanges();
+ // Emit info into a debug macinfo section.
+ emitDebugMacinfo();
+
if (useSplitDwarf()) {
emitDebugStrDWO();
emitDebugInfoDWO();
@@ -1833,6 +1847,70 @@ void DwarfDebug::emitDebugRanges() {
}
}
+unsigned DwarfDebug::handleMacroNodes(AsmStreamerBase *AS,
+ DIMacroNodeArray Nodes,
+ DwarfCompileUnit &U) {
+ unsigned Size = 0;
+ for (auto *MN : Nodes) {
+ if (auto *M = dyn_cast<DIMacro>(MN))
+ Size += emitMacro(AS, *M);
+ else if (auto *F = dyn_cast<DIMacroFile>(MN))
+ Size += emitMacroFile(AS, *F, U);
+ else
+ llvm_unreachable("Unexpected DI type!");
+ }
+ return Size;
+}
+
+unsigned DwarfDebug::emitMacro(AsmStreamerBase *AS, DIMacro &M) {
+ int Size = 0;
+ Size += AS->emitULEB128(M.getMacinfoType());
+ Size += AS->emitULEB128(M.getLine());
+ StringRef Name = M.getName();
+ StringRef Value = M.getValue();
+ Size += AS->emitBytes(Name);
+ if (!Value.empty()) {
+ // There should be one space between macro name and macro value.
+ Size += AS->emitInt8(' ');
+ Size += AS->emitBytes(Value);
+ }
+ Size += AS->emitInt8('\0');
+ return Size;
+}
+
+unsigned DwarfDebug::emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
+ DwarfCompileUnit &U) {
+ int Size = 0;
+ assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
+ Size += AS->emitULEB128(dwarf::DW_MACINFO_start_file);
+ Size += AS->emitULEB128(F.getLine());
+ DIFile *File = F.getFile();
+ unsigned FID =
+ U.getOrCreateSourceID(File->getFilename(), File->getDirectory());
+ Size += AS->emitULEB128(FID);
+ Size += handleMacroNodes(AS, F.getElements(), U);
+ Size += AS->emitULEB128(dwarf::DW_MACINFO_end_file);
+ return Size;
+}
+
+// Emit visible names into a debug macinfo section.
+void DwarfDebug::emitDebugMacinfo() {
+ if (MCSection *Macinfo = Asm->getObjFileLowering().getDwarfMacinfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer->SwitchSection(Macinfo);
+ }
+ std::unique_ptr<AsmStreamerBase> AS(new EmittingAsmStreamer(Asm));
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
+ auto *SkCU = TheCU.getSkeleton();
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ handleMacroNodes(AS.get(), CUNode->getMacros(), U);
+ }
+ Asm->OutStreamer->AddComment("End Of Macro List Mark");
+ Asm->EmitInt8(0);
+}
+
// DWARF5 Experimental Separate Dwarf emitters.
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4c613a9..460c186 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -400,18 +400,26 @@ class DwarfDebug : public AsmPrinterHandler {
/// Emit visible names into a debug str section.
void emitDebugStr();
- /// Emit visible names into a debug loc section.
+ /// Emit variable locations into a debug loc section.
void emitDebugLoc();
- /// Emit visible names into a debug loc dwo section.
+ /// Emit variable locations into a debug loc dwo section.
void emitDebugLocDWO();
- /// Emit visible names into a debug aranges section.
+ /// Emit address ranges into a debug aranges section.
void emitDebugARanges();
- /// Emit visible names into a debug ranges section.
+ /// Emit address ranges into a debug ranges section.
void emitDebugRanges();
+ /// Emit macros into a debug macinfo section.
+ void emitDebugMacinfo();
+ unsigned emitMacro(AsmStreamerBase *AS, DIMacro &M);
+ unsigned emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
+ DwarfCompileUnit &U);
+ unsigned handleMacroNodes(AsmStreamerBase *AS, DIMacroNodeArray Nodes,
+ DwarfCompileUnit &U);
+
/// DWARF 5 Experimental Split Dwarf Emitters
/// Initialize common features of skeleton units.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index c2c0f84..1e2f55b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -82,13 +82,24 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
const MDNode *Scope = DL.getScope();
if (!Scope)
return;
+ unsigned LineNumber = DL.getLine();
+ // Skip this line if it is longer than the maximum we can record.
+ if (LineNumber > COFF::CVL_MaxLineNumber)
+ return;
+
+ unsigned ColumnNumber = DL.getCol();
+ // Truncate the column number if it is longer than the maximum we can record.
+ if (ColumnNumber > COFF::CVL_MaxColumnNumber)
+ ColumnNumber = 0;
+
StringRef Filename = getFullFilepath(Scope);
// Skip this instruction if it has the same file:line as the previous one.
assert(CurFn);
if (!CurFn->Instrs.empty()) {
const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
- if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine())
+ if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber &&
+ LastInstr.ColumnNumber == ColumnNumber)
return;
}
FileNameRegistry.add(Filename);
@@ -96,7 +107,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
Asm->OutStreamer->EmitLabel(MCL);
CurFn->Instrs.push_back(MCL);
- InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol());
+ InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber);
}
WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
@@ -282,8 +293,9 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
ColSegI != ColSegEnd; ++ColSegI) {
unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
+ assert(ColumnNumber <= COFF::CVL_MaxColumnNumber);
Asm->EmitInt16(ColumnNumber); // Start column
- Asm->EmitInt16(ColumnNumber); // End column
+ Asm->EmitInt16(0); // End column
}
Asm->OutStreamer->EmitLabel(FileSegmentEnd);
};
@@ -320,7 +332,10 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
// The first PC with the given linenumber and the linenumber itself.
EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
- Asm->EmitInt32(InstrInfo[Instr].LineNumber);
+ uint32_t LineNumber = InstrInfo[Instr].LineNumber;
+ assert(LineNumber <= COFF::CVL_MaxLineNumber);
+ uint32_t LineData = LineNumber | COFF::CVL_IsStatement;
+ Asm->EmitInt32(LineData);
}
FinishPreviousChunk();
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 604feed..df5cac5 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -744,18 +744,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
-static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
- auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
- auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
- if ((E1 - I1) != (E2 - I2))
- return false;
- for (; I1 != E1; ++I1, ++I2) {
- if (**I1 != **I2)
- return false;
- }
- return true;
-}
-
static void
removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
MachineBasicBlock &MBBCommon) {
@@ -792,8 +780,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
- if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
- MBBICommon->dropMemRefs();
+ MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
++MBBI;
++MBBICommon;
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6fbdea8..03e5778 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1108,7 +1108,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
// <16 x i1> %mask, <16 x i32> %passthru)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
-//
+//
// %1 = bitcast i8* %addr to i32*
// %2 = extractelement <16 x i1> %mask, i32 0
// %3 = icmp eq i1 %2, true
@@ -1272,12 +1272,12 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// %5 = getelementptr i32* %1, i32 0
// store i32 %4, i32* %5
// br label %else
-//
+//
// else: ; preds = %0, %cond.store
// %6 = extractelement <16 x i1> %mask, i32 1
// %7 = icmp eq i1 %6, true
// br i1 %7, label %cond.store1, label %else2
-//
+//
// cond.store1: ; preds = %else
// %8 = extractelement <16 x i32> %val, i32 1
// %9 = getelementptr i32* %1, i32 1
@@ -1377,24 +1377,24 @@ static void ScalarizeMaskedStore(CallInst *CI) {
// <16 x i1> %Mask, <16 x i32> %Src)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
-//
+//
// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
// % Mask0 = extractelement <16 x i1> %Mask, i32 0
// % ToLoad0 = icmp eq i1 % Mask0, true
// br i1 % ToLoad0, label %cond.load, label %else
-//
+//
// cond.load:
// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
// % Load0 = load i32, i32* % Ptr0, align 4
// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
// br label %else
-//
+//
// else:
// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
// % Mask1 = extractelement <16 x i1> %Mask, i32 1
// % ToLoad1 = icmp eq i1 % Mask1, true
// br i1 % ToLoad1, label %cond.load1, label %else2
-//
+//
// cond.load1:
// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
// % Load1 = load i32, i32* % Ptr1, align 4
@@ -1526,7 +1526,7 @@ static void ScalarizeMaskedGather(CallInst *CI) {
// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
// store i32 %Elt0, i32* % Ptr0, align 4
// br label %else
-//
+//
// else:
// % Mask1 = extractelement <16 x i1> % Mask, i32 1
// % ToStore1 = icmp eq i1 % Mask1, true
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 98d30b9..b9937e5 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -19,6 +19,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -30,7 +32,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <deque>
+#include <queue>
#include <list>
using namespace llvm;
@@ -76,16 +78,13 @@ private:
typedef std::list<VarLoc> VarLocList;
typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB;
- bool OLChanged; // OutgoingLocs got changed for this bb.
- bool MBBJoined; // The MBB was joined.
-
void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges);
void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges);
- void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
+ bool transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
VarLocInMBB &OutLocs);
- void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
+ bool transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
- void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
+ bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
bool ExtendRanges(MachineFunction &MF);
@@ -225,24 +224,18 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
}
/// Terminate all open ranges at the end of the current basic block.
-void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
+bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
VarLocList &OpenRanges,
VarLocInMBB &OutLocs) {
+ bool Changed = false;
const MachineBasicBlock *CurMBB = MI.getParent();
if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
- return;
+ return false;
if (OpenRanges.empty())
- return;
+ return false;
- if (OutLocs.find(CurMBB) == OutLocs.end()) {
- // Create space for new Outgoing locs entries.
- VarLocList VLL;
- OutLocs.insert(std::make_pair(CurMBB, std::move(VLL)));
- }
- auto OL = OutLocs.find(CurMBB);
- assert(OL != OutLocs.end());
- VarLocList &VLL = OL->second;
+ VarLocList &VLL = OutLocs[CurMBB];
for (auto OR : OpenRanges) {
// Copy OpenRanges to OutLocs, if not already present.
@@ -251,28 +244,30 @@ void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
if (std::find_if(VLL.begin(), VLL.end(),
[&](const VarLoc &V) { return (OR == V); }) == VLL.end()) {
VLL.push_back(std::move(OR));
- OLChanged = true;
+ Changed = true;
}
}
OpenRanges.clear();
+ return Changed;
}
/// This routine creates OpenRanges and OutLocs.
-void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
+bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
VarLocInMBB &OutLocs) {
+ bool Changed = false;
transferDebugValue(MI, OpenRanges);
transferRegisterDef(MI, OpenRanges);
- transferTerminatorInst(MI, OpenRanges, OutLocs);
+ Changed = transferTerminatorInst(MI, OpenRanges, OutLocs);
+ return Changed;
}
/// This routine joins the analysis results of all incoming edges in @MBB by
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
-void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
+bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
VarLocInMBB &InLocs) {
DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
-
- MBBJoined = false;
+ bool Changed = false;
VarLocList InLocsT; // Temporary incoming locations.
@@ -282,7 +277,7 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
auto OL = OutLocs.find(p);
// Join is null in case of empty OutLocs from any of the pred.
if (OL == OutLocs.end())
- return;
+ return false;
// Just copy over the Out locs to incoming locs for the first predecessor.
if (p == *MBB.pred_begin()) {
@@ -292,27 +287,18 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
// Join with this predecessor.
VarLocList &VLL = OL->second;
- InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(),
- [&](VarLoc &ILT) {
- return (std::find_if(VLL.begin(), VLL.end(),
- [&](const VarLoc &V) {
- return (ILT == V);
- }) == VLL.end());
- }),
- InLocsT.end());
+ InLocsT.erase(
+ std::remove_if(InLocsT.begin(), InLocsT.end(), [&](VarLoc &ILT) {
+ return (std::find_if(VLL.begin(), VLL.end(), [&](const VarLoc &V) {
+ return (ILT == V);
+ }) == VLL.end());
+ }), InLocsT.end());
}
if (InLocsT.empty())
- return;
+ return false;
- if (InLocs.find(&MBB) == InLocs.end()) {
- // Create space for new Incoming locs entries.
- VarLocList VLL;
- InLocs.insert(std::make_pair(&MBB, std::move(VLL)));
- }
- auto IL = InLocs.find(&MBB);
- assert(IL != InLocs.end());
- VarLocList &ILL = IL->second;
+ VarLocList &ILL = InLocs[&MBB];
// Insert DBG_VALUE instructions, if not already inserted.
for (auto ILT : InLocsT) {
@@ -331,12 +317,13 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
DEBUG(dbgs() << "Inserted: "; MI->dump(););
++NumInserted;
- MBBJoined = true; // rerun transfer().
+ Changed = true;
VarLoc V(ILT.Var, MI);
ILL.push_back(std::move(V));
}
}
+ return Changed;
}
/// Calculate the liveness information for the given machine function and
@@ -346,48 +333,72 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
DEBUG(dbgs() << "\nDebug Range Extension\n");
bool Changed = false;
- OLChanged = MBBJoined = false;
+ bool OLChanged = false;
+ bool MBBJoined = false;
VarLocList OpenRanges; // Ranges that are open until end of bb.
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
- std::deque<MachineBasicBlock *> BBWorklist;
-
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>> Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>> Pending;
// Initialize every mbb with OutLocs.
for (auto &MBB : MF)
for (auto &MI : MBB)
transfer(MI, OpenRanges, OutLocs);
DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs()));
- // Construct a worklist of MBBs.
- for (auto &MBB : MF)
- BBWorklist.push_back(&MBB);
-
- // Perform join() and transfer() using the worklist until the ranges converge
- // Ranges have converged when the worklist is empty.
- while (!BBWorklist.empty()) {
- MachineBasicBlock *MBB = BBWorklist.front();
- BBWorklist.pop_front();
-
- join(*MBB, OutLocs, InLocs);
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
- if (MBBJoined) {
- Changed = true;
- for (auto &MI : *MBB)
- transfer(MI, OpenRanges, OutLocs);
- DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
- DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
-
- if (OLChanged) {
- OLChanged = false;
- for (auto s : MBB->successors())
- if (std::find(BBWorklist.begin(), BBWorklist.end(), s) ==
- BBWorklist.end()) // add if not already present.
- BBWorklist.push_back(s);
+ // This is a standard "union of predecessor outs" dataflow problem.
+ // To solve it, we perform join() and transfer() using the two worklist method
+ // until the ranges converge.
+ // Ranges have converged when both worklists are empty.
+ while (!Worklist.empty() || !Pending.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice. We could avoid this with a custom priority queue, but this
+ // is probably not worth it.
+ SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+ Worklist.pop();
+ MBBJoined = join(*MBB, OutLocs, InLocs);
+
+ if (MBBJoined) {
+ MBBJoined = false;
+ Changed = true;
+ for (auto &MI : *MBB)
+ OLChanged |= transfer(MI, OpenRanges, OutLocs);
+ DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
+ DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
+
+ if (OLChanged) {
+ OLChanged = false;
+ for (auto s : MBB->successors())
+ if (!OnPending.count(s)) {
+ OnPending.insert(s);
+ Pending.push(BBToOrder[s]);
+ }
+ }
}
}
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
}
+
DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs()));
DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs()));
return Changed;
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index efad36f..bb34883 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -1328,15 +1328,15 @@ void LiveRangeUpdater::flush() {
LR->verify();
}
-unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
// Create initial equivalence classes.
EqClass.clear();
- EqClass.grow(LI->getNumValNums());
+ EqClass.grow(LR.getNumValNums());
const VNInfo *used = nullptr, *unused = nullptr;
// Determine connections.
- for (const VNInfo *VNI : LI->valnos) {
+ for (const VNInfo *VNI : LR.valnos) {
// Group all unused values into one class.
if (VNI->isUnused()) {
if (unused)
@@ -1351,14 +1351,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
// Connect to values live out of predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI)
- if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
+ if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
EqClass.join(VNI->id, PVNI->id);
} else {
// Normal value defined by an instruction. Check for two-addr redef.
// FIXME: This could be coincidental. Should we really check for a tied
// operand constraint?
// Note that VNI->def may be a use slot for an early clobber def.
- if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
+ if (const VNInfo *UVNI = LR.getVNInfoBefore(VNI->def))
EqClass.join(VNI->id, UVNI->id);
}
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 9451d92..a506e05 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1446,7 +1446,7 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
SmallVectorImpl<LiveInterval*> &SplitLIs) {
ConnectedVNInfoEqClasses ConEQ(*this);
- unsigned NumComp = ConEQ.Classify(&LI);
+ unsigned NumComp = ConEQ.Classify(LI);
if (NumComp <= 1)
return;
DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 76099f2..85d544d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1182,7 +1182,7 @@ MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
/// as of just before "MI".
-///
+///
/// Search is localised to a neighborhood of
/// Neighborhood instructions before (searching for defs or kills) and N
/// instructions after (searching just for defs) MI.
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 790f5ac..4f424ff 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -31,7 +31,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
const std::string Banner;
MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
- MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
+ MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
: MachineFunctionPass(ID), OS(os), Banner(banner) {}
const char *getPassName() const override { return "MachineFunction Printer"; }
@@ -42,6 +42,8 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!llvm::isFunctionInPrintList(MF.getName()))
+ return false;
OS << "# " << Banner << ":\n";
MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 6b8eecc..6dca74d 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -866,14 +866,44 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
setMemRefs(NewMemRefs, NewMemRefs + NewNum);
}
+/// Check to see if the MMOs pointed to by the two MemRefs arrays are
+/// identical.
+static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
+ auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
+ auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
+ if ((E1 - I1) != (E2 - I2))
+ return false;
+ for (; I1 != E1; ++I1, ++I2) {
+ if (**I1 != **I2)
+ return false;
+ }
+ return true;
+}
+
std::pair<MachineInstr::mmo_iterator, unsigned>
MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
- // TODO: If we end up with too many memory operands, return the empty
- // conservative set rather than failing asserts.
+
+ // If either of the incoming memrefs are empty, we must be conservative and
+ // treat this as if we've exhausted our space for memrefs and dropped them.
+ if (memoperands_empty() || Other.memoperands_empty())
+ return std::make_pair(nullptr, 0);
+
+ // If both instructions have identical memrefs, we don't need to merge them.
+ // Since many instructions have a single memref, and we tend to merge things
+ // like pairs of loads from the same location, this catches a large number of
+ // cases in practice.
+ if (hasIdenticalMMOs(*this, Other))
+ return std::make_pair(MemRefs, NumMemRefs);
+
// TODO: consider uniquing elements within the operand lists to reduce
// space usage and fall back to conservative information less often.
- size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin())
- + (Other.memoperands_end() - Other.memoperands_begin());
+ size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
+
+ // If we don't have enough room to store this many memrefs, be conservative
+ // and drop them. Otherwise, we'd fail asserts when trying to add them to
+ // the new instruction.
+ if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
+ return std::make_pair(nullptr, 0);
MachineFunction *MF = getParent()->getParent();
mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index a8368e9..99a97d2 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -334,12 +334,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
// writes to all slots.
if (MI->memoperands_empty())
return true;
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end(); o != oe; ++o) {
- if (!(*o)->isStore() || !(*o)->getPseudoValue())
+ for (const MachineMemOperand *MemOp : MI->memoperands()) {
+ if (!MemOp->isStore() || !MemOp->getPseudoValue())
continue;
if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) {
+ dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
if (Value->getFrameIndex() == FI)
return true;
}
@@ -357,8 +356,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
bool RuledOut = false;
bool HasNonInvariantUse = false;
unsigned Def = 0;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isFI()) {
// Remember if the instruction stores to the frame index.
int FI = MO.getIndex();
@@ -452,9 +450,7 @@ void MachineLICM::HoistRegionPostRA() {
// Walk the entire region, count number of defs for each register, and
// collect potential LICM candidates.
const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *BB = Blocks[i];
-
+ for (MachineBasicBlock *BB : Blocks) {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
@@ -469,19 +465,15 @@ void MachineLICM::HoistRegionPostRA() {
}
SpeculationState = SpeculateUnknown;
- for (MachineBasicBlock::iterator
- MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
- MachineInstr *MI = &*MII;
- ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
- }
+ for (MachineInstr &MI : *BB)
+ ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
}
// Gather the registers read / clobbered by the terminator.
BitVector TermRegs(NumRegs);
MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
if (TI != Preheader->end()) {
- for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = TI->getOperand(i);
+ for (const MachineOperand &MO : TI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -500,17 +492,16 @@ void MachineLICM::HoistRegionPostRA() {
// 3. Make sure candidate def should not clobber
// registers read by the terminator. Similarly its def should not be
// clobbered by the terminator.
- for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
- if (Candidates[i].FI != INT_MIN &&
- StoredFIs.count(Candidates[i].FI))
+ for (CandidateInfo &Candidate : Candidates) {
+ if (Candidate.FI != INT_MIN &&
+ StoredFIs.count(Candidate.FI))
continue;
- unsigned Def = Candidates[i].Def;
+ unsigned Def = Candidate.Def;
if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
- MachineInstr *MI = Candidates[i].MI;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- const MachineOperand &MO = MI->getOperand(j);
+ MachineInstr *MI = Candidate.MI;
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
unsigned Reg = MO.getReg();
@@ -523,7 +514,7 @@ void MachineLICM::HoistRegionPostRA() {
}
}
if (Safe)
- HoistPostRA(MI, Candidates[i].Def);
+ HoistPostRA(MI, Candidate.Def);
}
}
}
@@ -532,15 +523,11 @@ void MachineLICM::HoistRegionPostRA() {
/// sure it is not killed by any instructions in the loop.
void MachineLICM::AddToLiveIns(unsigned Reg) {
const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *BB = Blocks[i];
+ for (MachineBasicBlock *BB : Blocks) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
- for (MachineBasicBlock::iterator
- MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
- MachineInstr *MI = &*MII;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineInstr &MI : *BB) {
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
MO.setIsKill(false);
@@ -582,8 +569,8 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
// Check loop exiting blocks.
SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
- for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
- if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
+ for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
+ if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
SpeculationState = SpeculateTrue;
return false;
}
@@ -689,8 +676,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
InitRegPressure(Preheader);
// Now perform LICM.
- for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
- MachineDomTreeNode *Node = Scopes[i];
+ for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
@@ -858,13 +844,11 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
if (MI.memoperands_empty())
return true;
- for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
- E = MI.memoperands_end(); I != E; ++I) {
- if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
+ for (MachineMemOperand *MemOp : MI.memoperands())
+ if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
if (PSV->isGOT() || PSV->isConstantPool())
return true;
- }
- }
+
return false;
}
@@ -899,9 +883,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
return false;
// The instruction is loop invariant if all of its operands are.
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = I.getOperand(i);
-
+ for (const MachineOperand &MO : I.operands()) {
if (!MO.isReg())
continue;
@@ -1230,11 +1212,8 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
/// preheader that may become duplicates of instructions that are hoisted
/// out of the loop.
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
- for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
- const MachineInstr *MI = &*I;
- unsigned Opcode = MI->getOpcode();
- CSEMap[Opcode].push_back(MI);
- }
+ for (MachineInstr &MI : *BB)
+ CSEMap[MI.getOpcode()].push_back(&MI);
}
/// Find an instruction amount PrevMIs that is a duplicate of MI.
@@ -1242,11 +1221,10 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
const MachineInstr*
MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
- for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
- const MachineInstr *PrevMI = PrevMIs[i];
+ for (const MachineInstr *PrevMI : PrevMIs)
if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
return PrevMI;
- }
+
return nullptr;
}
@@ -1296,8 +1274,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
}
}
- for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
- unsigned Idx = Defs[i];
+ for (unsigned Idx : Defs) {
unsigned Reg = MI->getOperand(Idx).getReg();
unsigned DupReg = Dup->getOperand(Idx).getReg();
MRI->replaceRegWith(Reg, DupReg);
@@ -1370,11 +1347,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
// rather than just live for part of it.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isDef() && !MO.isDead())
MRI->clearKillFlags(MO.getReg());
- }
// Add to the CSE map.
if (CI != CSEMap.end())
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index cdcd8eb..428295e 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1736,7 +1736,7 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
// Check the LI only has one connected component.
ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
+ unsigned NumComp = ConEQ.Classify(LI);
if (NumComp > 1) {
report("Multiple connected components in live interval", MF);
report_context(LI);
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index e7b3217..c1ff13e 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -2874,7 +2874,7 @@ void RegisterCoalescer::joinAllIntervals() {
std::vector<MBBPriorityInfo> MBBs;
MBBs.reserve(MF->size());
- for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
JoinSplitEdges && isSplitEdge(MBB)));
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 3749b1d..f33dc3e 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -313,21 +313,6 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
namespace {
-/// List of register defined and used by a machine instruction.
-class RegisterOperands {
-public:
- SmallVector<unsigned, 8> Uses;
- SmallVector<unsigned, 8> Defs;
- SmallVector<unsigned, 8> DeadDefs;
-
- void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
- const MachineRegisterInfo &MRI, bool IgnoreDead = false);
-
- /// Use liveness information to find dead defs not marked with a dead flag
- /// and move them to the DeadDefs vector.
- void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
-};
-
/// Collect this instruction's unique uses and defs into SmallVectors for
/// processing defs and uses in order.
///
@@ -385,9 +370,11 @@ class RegisterOperandsCollector {
}
}
- friend class RegisterOperands;
+ friend class llvm::RegisterOperands;
};
+} // namespace
+
void RegisterOperands::collect(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
const MachineRegisterInfo &MRI,
@@ -417,8 +404,6 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
}
}
-} // namespace
-
/// Initialize an array of N PressureDiffs.
void PressureDiffs::init(unsigned N) {
Size = N;
@@ -431,6 +416,18 @@ void PressureDiffs::init(unsigned N) {
PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff)));
}
+void PressureDiffs::addInstruction(unsigned Idx,
+ const RegisterOperands &RegOpers,
+ const MachineRegisterInfo &MRI) {
+ PressureDiff &PDiff = (*this)[Idx];
+ assert(!PDiff.begin()->isValid() && "stale PDiff");
+ for (unsigned Reg : RegOpers.Defs)
+ PDiff.addPressureChange(Reg, true, &MRI);
+
+ for (unsigned Reg : RegOpers.Uses)
+ PDiff.addPressureChange(Reg, false, &MRI);
+}
+
/// Add a change in pressure to the pressure diff of a given instruction.
void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
const MachineRegisterInfo *MRI) {
@@ -467,18 +464,6 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
}
}
-/// Record the pressure difference induced by the given operand list.
-static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers,
- const MachineRegisterInfo *MRI) {
- assert(!PDiff.begin()->isValid() && "stale PDiff");
-
- for (unsigned Reg : RegOpers.Defs)
- PDiff.addPressureChange(Reg, true, MRI);
-
- for (unsigned Reg : RegOpers.Uses)
- PDiff.addPressureChange(Reg, false, MRI);
-}
-
/// Force liveness of registers.
void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
for (unsigned Reg : Regs) {
@@ -514,39 +499,10 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
/// registers that are both defined and used by the instruction. If a pressure
/// difference pointer is provided record the changes is pressure caused by this
/// instruction independent of liveness.
-void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
- PressureDiff *PDiff) {
- assert(CurrPos != MBB->begin());
- if (!isBottomClosed())
- closeBottom();
-
- // Open the top of the region using block iterators.
- if (!RequireIntervals && isTopClosed())
- static_cast<RegionPressure&>(P).openTop(CurrPos);
-
- // Find the previous instruction.
- do
- --CurrPos;
- while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+void RegPressureTracker::recede(const RegisterOperands &RegOpers,
+ SmallVectorImpl<unsigned> *LiveUses) {
assert(!CurrPos->isDebugValue());
- SlotIndex SlotIdx;
- if (RequireIntervals)
- SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
-
- // Open the top of the region using slot indexes.
- if (RequireIntervals && isTopClosed())
- static_cast<IntervalPressure&>(P).openTop(SlotIdx);
-
- const MachineInstr &MI = *CurrPos;
- RegisterOperands RegOpers;
- RegOpers.collect(MI, *TRI, *MRI);
- if (RequireIntervals)
- RegOpers.detectDeadDefs(MI, *LIS);
-
- if (PDiff)
- collectPDiff(*PDiff, RegOpers, MRI);
-
// Boost pressure for all dead defs together.
increaseRegPressure(RegOpers.DeadDefs);
decreaseRegPressure(RegOpers.DeadDefs);
@@ -560,6 +516,10 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
discoverLiveOut(Reg);
}
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
// Generate liveness for uses.
for (unsigned Reg : RegOpers.Uses) {
if (!LiveRegs.contains(Reg)) {
@@ -586,6 +546,41 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
}
}
+void RegPressureTracker::recedeSkipDebugValues() {
+ assert(CurrPos != MBB->begin());
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ do
+ --CurrPos;
+ while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+}
+
+void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses) {
+ recedeSkipDebugValues();
+
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI);
+ if (RequireIntervals)
+ RegOpers.detectDeadDefs(MI, *LIS);
+
+ recede(RegOpers, LiveUses);
+}
+
/// Advance across the current instruction.
void RegPressureTracker::advance() {
assert(!TrackUntiedDefs && "unsupported mode");
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index fb82ab7..11b246a 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -896,11 +896,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
assert(SU && "No SUnit mapped to this MI");
if (RPTracker) {
- PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
- RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
- assert(RPTracker->getPos() == std::prev(MII) &&
- "RPTracker can't find MI");
collectVRegUses(SU);
+
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI);
+ if (PDiffs != nullptr)
+ PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
+
+ RPTracker->recedeSkipDebugValues();
+ assert(&*RPTracker->getPos() == MI && "RPTracker in sync");
+ RPTracker->recede(RegOpers);
}
assert(
@@ -1005,6 +1010,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, TrueMemOrderLatency);
}
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+ // adds to.
adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
PendingLoads.clear();
@@ -1086,6 +1094,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
}
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+ // adds to.
adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
} else if (MI->mayLoad()) {
@@ -1133,13 +1144,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
else
NonAliasMemUses[V].push_back(SU);
}
- if (MayAlias)
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
- RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
+ if (MayAlias)
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency()
+ // possibly adds to.
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
+ RejectMemNodes, /*Latency=*/0);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bc2405b9..c741982 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7325,6 +7325,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitcast (fneg x)) ->
// flipbit = signbit
// (xor (bitcast x) (build_pair flipbit, flipbit))
+ //
// fold (bitcast (fabs x)) ->
// flipbit = (and (extract_element (bitcast x), 0), signbit)
// (xor (bitcast x) (build_pair flipbit, flipbit))
@@ -8794,20 +8795,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
ZeroCmp, Zero, RV);
}
+/// copysign(x, fp_extend(y)) -> copysign(x, y)
+/// copysign(x, fp_round(y)) -> copysign(x, y)
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
- // copysign(x, fp_extend(y)) -> copysign(x, y)
- // copysign(x, fp_round(y)) -> copysign(x, y)
- // Do not optimize out type conversion of f128 type yet.
- // For some target like x86_64, configuration is changed
- // to keep one f128 value in one SSE register, but
- // instruction selection cannot handle FCOPYSIGN on
- // SSE registers yet.
SDValue N1 = N->getOperand(1);
- EVT N1VT = N1->getValueType(0);
- EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
- return (N1.getOpcode() == ISD::FP_EXTEND ||
- N1.getOpcode() == ISD::FP_ROUND) &&
- (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ if ((N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND)) {
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ }
+ return false;
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b62bd2b..08815ed 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -297,8 +297,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
else if (Personality == EHPersonality::CoreCLR)
calculateClrEHStateNumbers(&fn, EHInfo);
- calculateCatchReturnSuccessorColors(&fn, EHInfo);
-
// Map all BB references in the WinEH data to MBBs.
for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (WinEHHandlerType &H : TBME.HandlerArray) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f46767f..5d572c4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2941,6 +2941,18 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// This trivially expands to CTLZ.
return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
+ EVT VT = Op.getValueType();
+ unsigned len = VT.getSizeInBits();
+
+ if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT = getSetCCResultType(VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(len, dl, VT), CTLZ);
+ }
+
// for now, we do this:
// x = x | (x >> 1);
// x = x | (x >> 2);
@@ -2950,9 +2962,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// return popcount(~x);
//
// Ref: "Hacker's Delight" by Henry Warren
- EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- unsigned len = VT.getSizeInBits();
for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::OR, dl, VT, Op,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index cd114d6..74f80db 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -262,12 +262,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
case TargetLowering::TypePromoteFloat: {
// Convert the promoted float by hand.
- if (NOutVT.bitsEq(NInVT)) {
- SDValue PromotedOp = GetPromotedFloat(InOp);
- SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
- return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc,
- DAG.getValueType(OutVT));
- }
+ SDValue PromotedOp = GetPromotedFloat(InOp);
+ return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
break;
}
case TargetLowering::TypeExpandInteger:
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e446a93..45ae39a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1205,8 +1205,13 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// Figure out the funclet membership for the catchret's successor.
// This will be used by the FuncletLayout pass to determine how to order the
// BB's.
- WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
- const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I];
+ // A 'catchret' returns to the outer scope's color.
+ Value *ParentPad = I.getParentPad();
+ const BasicBlock *SuccessorColor;
+ if (isa<ConstantTokenNone>(ParentPad))
+ SuccessorColor = &FuncInfo.Fn->getEntryBlock();
+ else
+ SuccessorColor = cast<Instruction>(ParentPad)->getParent();
assert(SuccessorColor && "No parent funclet for catchret!");
MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 6547a62..02545a7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -461,7 +461,9 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
// If the original value was a constant, make sure it gets recorded as
// such in the stackmap. This is required so that the consumer can
// parse any internal format to the deopt state. It also handles null
- // pointers and other constant pointers in GC states
+ // pointers and other constant pointers in GC states. Note the constant
+ // vectors do not appear to actually hit this path and that anything larger
+ // than an i64 value (not type!) will fail asserts here.
pushStackMapConstant(Ops, Builder, C->getSExtValue());
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
// This handles allocas as arguments to the statepoint (this is only
@@ -505,27 +507,27 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
#ifndef NDEBUG
// Check that each of the gc pointer and bases we've gotten out of the
- // safepoint is something the strategy thinks might be a pointer into the GC
- // heap. This is basically just here to help catch errors during statepoint
- // insertion. TODO: This should actually be in the Verifier, but we can't get
- // to the GCStrategy from there (yet).
+ // safepoint is something the strategy thinks might be a pointer (or vector
+ // of pointers) into the GC heap. This is basically just here to help catch
+ // errors during statepoint insertion. TODO: This should actually be in the
+ // Verifier, but we can't get to the GCStrategy from there (yet).
GCStrategy &S = Builder.GFI->getStrategy();
for (const Value *V : Bases) {
- auto Opt = S.isGCManagedPointer(V->getType());
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : Ptrs) {
- auto Opt = S.isGCManagedPointer(V->getType());
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed derived pointer found in statepoint");
}
}
for (const Value *V : Relocations) {
- auto Opt = S.isGCManagedPointer(V->getType());
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt.hasValue()) {
assert(Opt.getValue() && "non gc managed pointer relocated");
}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index f8aa1e2..d361a6c 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -47,6 +47,7 @@
// MachineFrameInfo is updated with this information.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
// To check for profitability.
@@ -263,6 +264,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
if (!IDom)
break;
}
+ if (IDom == &Block)
+ return nullptr;
return IDom;
}
@@ -352,13 +355,9 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
// Push Save outside of this loop if immediate dominator is different
// from save block. If immediate dominator is not different, bail out.
- MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT);
- if (IDom != Save)
- Save = IDom;
- else {
- Save = nullptr;
+ Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (!Save)
break;
- }
} else {
// If the loop does not exit, there is no point in looking
// for a post-dominator outside the loop.
@@ -386,6 +385,41 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
}
}
+/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI.
+/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the
+/// loop header.
+static bool isProperBackedge(const MachineLoopInfo &MLI,
+ const MachineBasicBlock *SrcBB,
+ const MachineBasicBlock *DestBB) {
+ for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop;
+ Loop = Loop->getParentLoop()) {
+ if (Loop->getHeader() == DestBB)
+ return true;
+ }
+ return false;
+}
+
+/// Check if the CFG of \p MF is irreducible.
+static bool isIrreducibleCFG(const MachineFunction &MF,
+ const MachineLoopInfo &MLI) {
+ const MachineBasicBlock *Entry = &*MF.begin();
+ ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry);
+ BitVector VisitedBB(MF.getNumBlockIDs());
+ for (const MachineBasicBlock *MBB : RPOT) {
+ VisitedBB.set(MBB->getNumber());
+ for (const MachineBasicBlock *SuccBB : MBB->successors()) {
+ if (!VisitedBB.test(SuccBB->getNumber()))
+ continue;
+ // We already visited SuccBB, thus MBB->SuccBB must be a backedge.
+ // Check that the head matches what we have in the loop information.
+ // Otherwise, we have an irreducible graph.
+ if (!isProperBackedge(MLI, MBB, SuccBB))
+ return true;
+ }
+ }
+ return false;
+}
+
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
if (MF.empty() || !isShrinkWrapEnabled(MF))
return false;
@@ -394,6 +428,17 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
init(MF);
+ if (isIrreducibleCFG(MF, *MLI)) {
+ // If MF is irreducible, a block may be in a loop without
+ // MachineLoopInfo reporting it. I.e., we may use the
+ // post-dominance property in loops, which lead to incorrect
+ // results. Moreover, we may miss that the prologue and
+ // epilogue are not in the same loop, leading to unbalanced
+ // construction/deconstruction of the stack frame.
+ DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n");
+ return false;
+ }
+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
std::unique_ptr<RegScavenger> RS(
TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 3541b33..7b52038 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -570,6 +571,14 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
}
+ // Update the location of C++ catch objects for the MSVC personality routine.
+ if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
+ for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
+ for (WinEHHandlerType &H : TBME.HandlerArray)
+ if (H.CatchObj.FrameIndex != INT_MAX &&
+ SlotRemap.count(H.CatchObj.FrameIndex))
+ H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
+
DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 2426c27..886c5f6 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -144,10 +144,11 @@ static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
HT.Handler = CPI->getParent();
- if (isa<ConstantPointerNull>(CPI->getArgOperand(2)))
- HT.CatchObj.Alloca = nullptr;
+ if (auto *AI =
+ dyn_cast<AllocaInst>(CPI->getArgOperand(2)->stripPointerCasts()))
+ HT.CatchObj.Alloca = AI;
else
- HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2));
+ HT.CatchObj.Alloca = nullptr;
TBME.HandlerArray.push_back(HT);
}
FuncInfo.TryBlockMap.push_back(TBME);
@@ -664,24 +665,6 @@ void WinEHPrepare::colorFunclets(Function &F) {
}
}
-void llvm::calculateCatchReturnSuccessorColors(const Function *Fn,
- WinEHFuncInfo &FuncInfo) {
- for (const BasicBlock &BB : *Fn) {
- const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator());
- if (!CatchRet)
- continue;
- // A 'catchret' returns to the outer scope's color.
- Value *ParentPad = CatchRet->getParentPad();
- const BasicBlock *Color;
- if (isa<ConstantTokenNone>(ParentPad))
- Color = &Fn->getEntryBlock();
- else
- Color = cast<Instruction>(ParentPad)->getParent();
- // Record the catchret successor's funclet membership.
- FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color;
- }
-}
-
void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
// Strip PHI nodes off of EH pads.
SmallVector<PHINode *, 16> PHINodes;
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index c6bfbc0..a9dee7a 100644
--- a/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Support/LineIterator.h"
namespace llvm {
namespace symbolize {
@@ -24,7 +25,37 @@ namespace symbolize {
static const char kDILineInfoBadString[] = "<invalid>";
static const char kBadString[] = "??";
-void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
+// Prints source code around in the FileName the Line.
+void DIPrinter::printContext(std::string FileName, int64_t Line) {
+ if (PrintSourceContext <= 0)
+ return;
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFile(FileName);
+ if (!BufOrErr)
+ return;
+
+ std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
+ int64_t FirstLine =
+ std::max(static_cast<int64_t>(1), Line - PrintSourceContext / 2);
+ int64_t LastLine = FirstLine + PrintSourceContext;
+ size_t MaxLineNumberWidth = std::ceil(std::log10(LastLine));
+
+ for (line_iterator I = line_iterator(*Buf, false);
+ !I.is_at_eof() && I.line_number() <= LastLine; ++I) {
+ int64_t L = I.line_number();
+ if (L >= FirstLine && L <= LastLine) {
+ OS << format_decimal(L, MaxLineNumberWidth);
+ if (L == Line)
+ OS << " >: ";
+ else
+ OS << " : ";
+ OS << *I << "\n";
+ }
+ }
+}
+
+void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
if (PrintFunctionNames) {
std::string FunctionName = Info.FunctionName;
if (FunctionName == kDILineInfoBadString)
@@ -38,21 +69,22 @@ void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
if (Filename == kDILineInfoBadString)
Filename = kBadString;
OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+ printContext(Filename, Info.Line);
}
DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) {
- printName(Info, false);
+ print(Info, false);
return *this;
}
DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) {
uint32_t FramesNum = Info.getNumberOfFrames();
if (FramesNum == 0) {
- printName(DILineInfo(), false);
+ print(DILineInfo(), false);
return *this;
}
for (uint32_t i = 0; i < FramesNum; i++)
- printName(Info.getFrame(i), i > 0);
+ print(Info.getFrame(i), i > 0);
return *this;
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
index b931f10..01e829f 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
@@ -1,4 +1,4 @@
-//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===//
+//===------ OrcArchSupport.cpp - Architecture specific support code -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Triple.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/Support/Process.h"
#include <array>
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
index e519c7f..956daae 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
@@ -9,7 +9,7 @@
#include "OrcCBindingsStack.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include <cstdio>
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 2e17624..aae6a99 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -221,7 +221,8 @@ public:
ModuleHandleT addIRModuleLazy(Module* M,
LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
- return addIRModule(CODLayer, std::move(M), nullptr,
+ return addIRModule(CODLayer, std::move(M),
+ llvm::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
new file mode 100644
index 0000000..e95115e
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -0,0 +1,57 @@
+//===---------------- OrcError.cpp - Error codes for ORC ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Error codes for ORC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace {
+
+class OrcErrorCategory : public std::error_category {
+public:
+ const char *name() const LLVM_NOEXCEPT override { return "orc"; }
+
+ std::string message(int condition) const override {
+ switch (static_cast<OrcErrorCode>(condition)) {
+ case OrcErrorCode::RemoteAllocatorDoesNotExist:
+ return "Remote allocator does not exist";
+ case OrcErrorCode::RemoteAllocatorIdAlreadyInUse:
+ return "Remote allocator Id already in use";
+ case OrcErrorCode::RemoteMProtectAddrUnrecognized:
+ return "Remote mprotect call references unallocated memory";
+ case OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist:
+ return "Remote indirect stubs owner does not exist";
+ case OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse:
+ return "Remote indirect stubs owner Id already in use";
+ case OrcErrorCode::UnexpectedRPCCall:
+ return "Unexpected RPC call";
+ }
+ llvm_unreachable("Unhandled error code");
+ }
+};
+
+static ManagedStatic<OrcErrorCategory> OrcErrCat;
+}
+
+namespace llvm {
+namespace orc {
+
+std::error_code orcError(OrcErrorCode ErrCode) {
+ typedef std::underlying_type<OrcErrorCode>::type UT;
+ return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat);
+}
+}
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 38a27cf..2ab70a9 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -54,10 +54,13 @@ class OrcMCJITReplacement : public ExecutionEngine {
return Addr;
}
- void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
- uintptr_t DataSizeRW) override {
- return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO,
- DataSizeRW);
+ void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t RODataSize, uint32_t RODataAlign,
+ uintptr_t RWDataSize,
+ uint32_t RWDataAlign) override {
+ return ClientMM->reserveAllocationSpace(CodeSize, CodeAlign,
+ RODataSize, RODataAlign,
+ RWDataSize, RWDataAlign);
}
bool needsToReserveAllocationSpace() override {
@@ -74,6 +77,11 @@ class OrcMCJITReplacement : public ExecutionEngine {
return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
}
+ void notifyObjectLoaded(RuntimeDyld &RTDyld,
+ const object::ObjectFile &O) override {
+ return ClientMM->notifyObjectLoaded(RTDyld, O);
+ }
+
void notifyObjectLoaded(ExecutionEngine *EE,
const object::ObjectFile &O) override {
return ClientMM->notifyObjectLoaded(EE, O);
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
new file mode 100644
index 0000000..064633b
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
@@ -0,0 +1,83 @@
+//===------- OrcRemoteTargetRPCAPI.cpp - ORC Remote API utilities ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+const char *OrcRemoteTargetRPCAPI::getJITProcIdName(JITProcId Id) {
+ switch (Id) {
+ case InvalidId:
+ return "*** Invalid JITProcId ***";
+ case CallIntVoidId:
+ return "CallIntVoid";
+ case CallIntVoidResponseId:
+ return "CallIntVoidResponse";
+ case CallMainId:
+ return "CallMain";
+ case CallMainResponseId:
+ return "CallMainResponse";
+ case CallVoidVoidId:
+ return "CallVoidVoid";
+ case CallVoidVoidResponseId:
+ return "CallVoidVoidResponse";
+ case CreateRemoteAllocatorId:
+ return "CreateRemoteAllocator";
+ case CreateIndirectStubsOwnerId:
+ return "CreateIndirectStubsOwner";
+ case DestroyRemoteAllocatorId:
+ return "DestroyRemoteAllocator";
+ case DestroyIndirectStubsOwnerId:
+ return "DestroyIndirectStubsOwner";
+ case EmitIndirectStubsId:
+ return "EmitIndirectStubs";
+ case EmitIndirectStubsResponseId:
+ return "EmitIndirectStubsResponse";
+ case EmitResolverBlockId:
+ return "EmitResolverBlock";
+ case EmitTrampolineBlockId:
+ return "EmitTrampolineBlock";
+ case EmitTrampolineBlockResponseId:
+ return "EmitTrampolineBlockResponse";
+ case GetSymbolAddressId:
+ return "GetSymbolAddress";
+ case GetSymbolAddressResponseId:
+ return "GetSymbolAddressResponse";
+ case GetRemoteInfoId:
+ return "GetRemoteInfo";
+ case GetRemoteInfoResponseId:
+ return "GetRemoteInfoResponse";
+ case ReadMemId:
+ return "ReadMem";
+ case ReadMemResponseId:
+ return "ReadMemResponse";
+ case ReserveMemId:
+ return "ReserveMem";
+ case ReserveMemResponseId:
+ return "ReserveMemResponse";
+ case RequestCompileId:
+ return "RequestCompile";
+ case RequestCompileResponseId:
+ return "RequestCompileResponse";
+ case SetProtectionsId:
+ return "SetProtections";
+ case TerminateSessionId:
+ return "TerminateSession";
+ case WriteMemId:
+ return "WriteMem";
+ case WritePtrId:
+ return "WritePtr";
+ };
+ return nullptr;
+}
+}
+}
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index a95f3bb..d16b2db 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -146,9 +146,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
// Compute the memory size required to load all sections to be loaded
// and pass this information to the memory manager
if (MemMgr.needsToReserveAllocationSpace()) {
- uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0;
- computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW);
- MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+ uint64_t CodeSize = 0, RODataSize = 0, RWDataSize = 0;
+ uint32_t CodeAlign = 1, RODataAlign = 1, RWDataAlign = 1;
+ computeTotalAllocSize(Obj, CodeSize, CodeAlign, RODataSize, RODataAlign,
+ RWDataSize, RWDataAlign);
+ MemMgr.reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign,
+ RWDataSize, RWDataAlign);
}
// Used sections from the object file
@@ -335,13 +338,15 @@ static bool isZeroInit(const SectionRef Section) {
// sections
void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
uint64_t &CodeSize,
- uint64_t &DataSizeRO,
- uint64_t &DataSizeRW) {
+ uint32_t &CodeAlign,
+ uint64_t &RODataSize,
+ uint32_t &RODataAlign,
+ uint64_t &RWDataSize,
+ uint32_t &RWDataAlign) {
// Compute the size of all sections required for execution
std::vector<uint64_t> CodeSectionSizes;
std::vector<uint64_t> ROSectionSizes;
std::vector<uint64_t> RWSectionSizes;
- uint64_t MaxAlignment = sizeof(void *);
// Collect sizes of all sections to be loaded;
// also determine the max alignment of all sections
@@ -376,17 +381,15 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
SectionSize = 1;
if (IsCode) {
+ CodeAlign = std::max(CodeAlign, Alignment);
CodeSectionSizes.push_back(SectionSize);
} else if (IsReadOnly) {
+ RODataAlign = std::max(RODataAlign, Alignment);
ROSectionSizes.push_back(SectionSize);
} else {
+ RWDataAlign = std::max(RWDataAlign, Alignment);
RWSectionSizes.push_back(SectionSize);
}
-
- // update the max alignment
- if (Alignment > MaxAlignment) {
- MaxAlignment = Alignment;
- }
}
}
@@ -410,9 +413,9 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
// allocated with the max alignment. Note that we cannot compute with the
// individual alignments of the sections, because then the required size
// depends on the order, in which the sections are allocated.
- CodeSize = computeAllocationSizeForSections(CodeSectionSizes, MaxAlignment);
- DataSizeRO = computeAllocationSizeForSections(ROSectionSizes, MaxAlignment);
- DataSizeRW = computeAllocationSizeForSections(RWSectionSizes, MaxAlignment);
+ CodeSize = computeAllocationSizeForSections(CodeSectionSizes, CodeAlign);
+ RODataSize = computeAllocationSizeForSections(ROSectionSizes, RODataAlign);
+ RWDataSize = computeAllocationSizeForSections(RWSectionSizes, RWDataAlign);
}
// compute stub buffer size for the given section
@@ -937,7 +940,9 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
if (!Dyld->isCompatibleFile(Obj))
report_fatal_error("Incompatible object format!");
- return Dyld->loadObject(Obj);
+ auto LoadedObjInfo = Dyld->loadObject(Obj);
+ MemMgr.notifyObjectLoaded(*this, Obj);
+ return LoadedObjInfo;
}
void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
@@ -967,6 +972,17 @@ bool RuntimeDyld::hasError() { return Dyld->hasError(); }
StringRef RuntimeDyld::getErrorString() { return Dyld->getErrorString(); }
+void RuntimeDyld::finalizeWithMemoryManagerLocking() {
+ bool MemoryFinalizationLocked = MemMgr.FinalizationLocked;
+ MemMgr.FinalizationLocked = true;
+ resolveRelocations();
+ registerEHFrames();
+ if (!MemoryFinalizationLocked) {
+ MemMgr.finalizeMemory();
+ MemMgr.FinalizationLocked = false;
+ }
+}
+
void RuntimeDyld::registerEHFrames() {
if (Dyld)
Dyld->registerEHFrames();
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index dafd3c8..ab732c6 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -411,8 +411,10 @@ protected:
// \brief Compute an upper bound of the memory that is required to load all
// sections
- void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize,
- uint64_t &DataSizeRO, uint64_t &DataSizeRW);
+ void computeTotalAllocSize(const ObjectFile &Obj,
+ uint64_t &CodeSize, uint32_t &CodeAlign,
+ uint64_t &RODataSize, uint32_t &RODataAlign,
+ uint64_t &RWDataSize, uint32_t &RWDataAlign);
// \brief Compute the stub buffer size required for a section
unsigned computeSectionStubBufSize(const ObjectFile &Obj,
diff --git a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index e2f2208..1ad5f17 100644
--- a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -137,9 +137,6 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
return true;
}
- // Don't allow free memory blocks to be used after setting protection flags.
- RODataMem.FreeMem.clear();
-
// Make read-only data memory read-only.
ec = applyMemoryGroupPermissions(RODataMem,
sys::Memory::MF_READ | sys::Memory::MF_EXEC);
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index 1ebe9b7..0ce44e1 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -3121,7 +3121,7 @@ void AssemblyWriter::printMetadataAttachments(
return;
if (MDNames.empty())
- TheModule->getMDKindNames(MDNames);
+ MDs[0].second->getContext().getMDKindNames(MDNames);
for (const auto &I : MDs) {
unsigned Kind = I.first;
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index 7f39c80..591dafa 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -1722,7 +1722,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
const char *LLVMGetGC(LLVMValueRef Fn) {
Function *F = unwrap<Function>(Fn);
- return F->hasGC()? F->getGC() : nullptr;
+ return F->hasGC()? F->getGC().c_str() : nullptr;
}
void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index cfb40b1..cfdfc40 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -366,47 +366,21 @@ void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
setAttributes(PAL);
}
-// Maintain the GC name for each function in an on-the-side table. This saves
-// allocating an additional word in Function for programs which do not use GC
-// (i.e., most programs) at the cost of increased overhead for clients which do
-// use GC.
-static DenseMap<const Function*,PooledStringPtr> *GCNames;
-static StringPool *GCNamePool;
-static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
-
-bool Function::hasGC() const {
- sys::SmartScopedReader<true> Reader(*GCLock);
- return GCNames && GCNames->count(this);
-}
-
-const char *Function::getGC() const {
+const std::string &Function::getGC() const {
assert(hasGC() && "Function has no collector");
- sys::SmartScopedReader<true> Reader(*GCLock);
- return *(*GCNames)[this];
+ return getContext().getGC(*this);
}
-void Function::setGC(const char *Str) {
- sys::SmartScopedWriter<true> Writer(*GCLock);
- if (!GCNamePool)
- GCNamePool = new StringPool();
- if (!GCNames)
- GCNames = new DenseMap<const Function*,PooledStringPtr>();
- (*GCNames)[this] = GCNamePool->intern(Str);
+void Function::setGC(const std::string Str) {
+ setValueSubclassDataBit(14, !Str.empty());
+ getContext().setGC(*this, std::move(Str));
}
void Function::clearGC() {
- sys::SmartScopedWriter<true> Writer(*GCLock);
- if (GCNames) {
- GCNames->erase(this);
- if (GCNames->empty()) {
- delete GCNames;
- GCNames = nullptr;
- if (GCNamePool->empty()) {
- delete GCNamePool;
- GCNamePool = nullptr;
- }
- }
- }
+ if (!hasGC())
+ return;
+ getContext().deleteGC(*this);
+ setValueSubclassDataBit(14, false);
}
/// Copy all additional attributes (those not needed to create a Function) from
diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
index c1ac336..822dbeb 100644
--- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
@@ -28,7 +28,13 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
PreservedAnalyses PrintModulePass::run(Module &M) {
OS << Banner;
- M.print(OS, nullptr, ShouldPreserveUseListOrder);
+ if (llvm::isFunctionInPrintList("*"))
+ M.print(OS, nullptr, ShouldPreserveUseListOrder);
+ else {
+ for(const auto &F : M.functions())
+ if (llvm::isFunctionInPrintList(F.getName()))
+ F.print(OS);
+ }
return PreservedAnalyses::all();
}
@@ -37,7 +43,8 @@ PrintFunctionPass::PrintFunctionPass(raw_ostream &OS, const std::string &Banner)
: OS(OS), Banner(Banner) {}
PreservedAnalyses PrintFunctionPass::run(Function &F) {
- OS << Banner << static_cast<Value &>(F);
+ if (isFunctionInPrintList(F.getName()))
+ OS << Banner << static_cast<Value &>(F);
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index 8848bcb..48b53b0 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -304,3 +304,19 @@ void LLVMContext::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const {
uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const {
return pImpl->getOperandBundleTagID(Tag);
}
+
+void LLVMContext::setGC(const Function &Fn, std::string GCName) {
+ auto It = pImpl->GCNames.find(&Fn);
+
+ if (It == pImpl->GCNames.end()) {
+ pImpl->GCNames.insert(std::make_pair(&Fn, std::move(GCName)));
+ return;
+ }
+ It->second = std::move(GCName);
+}
+const std::string &LLVMContext::getGC(const Function &Fn) {
+ return pImpl->GCNames[&Fn];
+}
+void LLVMContext::deleteGC(const Function &Fn) {
+ pImpl->GCNames.erase(&Fn);
+}
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index a24114d..d42047d 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -1027,6 +1027,13 @@ public:
void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const;
uint32_t getOperandBundleTagID(StringRef Tag) const;
+ /// Maintain the GC name for each function.
+ ///
+ /// This saves allocating an additional word in Function for programs which
+ /// do not use GC (i.e., most programs) at the cost of increased overhead for
+ /// clients which do use GC.
+ DenseMap<const Function*, std::string> GCNames;
+
LLVMContextImpl(LLVMContext &C);
~LLVMContextImpl();
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index f2e0c7d..63d89f2 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <map>
+#include <unordered_set>
using namespace llvm;
using namespace llvm::legacy;
@@ -83,6 +84,13 @@ PrintAfterAll("print-after-all",
llvm::cl::desc("Print IR after each pass"),
cl::init(false));
+static cl::list<std::string>
+ PrintFuncsList("filter-print-funcs", cl::value_desc("function names"),
+ cl::desc("Only print IR for functions whose name "
+ "match this for all print-[before|after][-all] "
+ "options"),
+ cl::CommaSeparated);
+
/// This is a helper to determine whether to print IR before or
/// after a pass.
@@ -109,6 +117,11 @@ static bool ShouldPrintAfterPass(const PassInfo *PI) {
return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
}
+bool llvm::isFunctionInPrintList(StringRef FunctionName) {
+ static std::unordered_set<std::string> PrintFuncNames(PrintFuncsList.begin(),
+ PrintFuncsList.end());
+ return PrintFuncNames.empty() || PrintFuncNames.count(FunctionName);
+}
/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
/// or higher is specified.
bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index d8eaceb..9a9a501 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -557,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() {
resolve();
}
-void MDNode::resolveCycles(bool AllowTemps) {
+void MDNode::resolveRecursivelyImpl(bool AllowTemps) {
if (isResolved())
return;
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 6dfb05d..9198b0e 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -45,6 +45,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/Verifier.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -145,6 +146,11 @@ private:
OS << *C;
}
+ template <typename T> void Write(ArrayRef<T> Vs) {
+ for (const T &V : Vs)
+ Write(V);
+ }
+
template <typename T1, typename... Ts>
void WriteTs(const T1 &V1, const Ts &... Vs) {
Write(V1);
@@ -204,6 +210,10 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// given function and the largest index passed to llvm.localrecover.
DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
+ // Maps catchswitches and cleanuppads that unwind to siblings to the
+ // terminators that indicate the unwind, used to detect cycles therein.
+ MapVector<Instruction *, TerminatorInst *> SiblingFuncletInfo;
+
/// Cache of constants visited in search of ConstantExprs.
SmallPtrSet<const Constant *, 32> ConstantExprVisited;
@@ -245,9 +255,11 @@ public:
Broken = false;
// FIXME: We strip const here because the inst visitor strips const.
visit(const_cast<Function &>(F));
+ verifySiblingFuncletUnwinds();
InstsInThisBlock.clear();
LandingPadResultTy = nullptr;
SawFrameEscape = false;
+ SiblingFuncletInfo.clear();
return !Broken;
}
@@ -403,6 +415,7 @@ private:
void visitCatchPadInst(CatchPadInst &CPI);
void visitCatchReturnInst(CatchReturnInst &CatchReturn);
void visitCleanupPadInst(CleanupPadInst &CPI);
+ void visitFuncletPadInst(FuncletPadInst &FPI);
void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch);
void visitCleanupReturnInst(CleanupReturnInst &CRI);
@@ -428,6 +441,7 @@ private:
void visitConstantExpr(const ConstantExpr *CE);
void VerifyStatepoint(ImmutableCallSite CS);
void verifyFrameRecoverIndices();
+ void verifySiblingFuncletUnwinds();
// Module-level debug info verification...
void verifyTypeRefs();
@@ -984,6 +998,9 @@ void Verifier::visitDIMacro(const DIMacro &N) {
N.getMacinfoType() == dwarf::DW_MACINFO_undef,
"invalid macinfo type", &N);
Assert(!N.getName().empty(), "anonymous macro", &N);
+ if (!N.getValue().empty()) {
+ assert(N.getValue().data()[0] != ' ' && "Macro value has a space prefix");
+ }
}
void Verifier::visitDIMacroFile(const DIMacroFile &N) {
@@ -1693,6 +1710,59 @@ void Verifier::verifyFrameRecoverIndices() {
}
}
+static Instruction *getSuccPad(TerminatorInst *Terminator) {
+ BasicBlock *UnwindDest;
+ if (auto *II = dyn_cast<InvokeInst>(Terminator))
+ UnwindDest = II->getUnwindDest();
+ else if (auto *CSI = dyn_cast<CatchSwitchInst>(Terminator))
+ UnwindDest = CSI->getUnwindDest();
+ else
+ UnwindDest = cast<CleanupReturnInst>(Terminator)->getUnwindDest();
+ return UnwindDest->getFirstNonPHI();
+}
+
+void Verifier::verifySiblingFuncletUnwinds() {
+ SmallPtrSet<Instruction *, 8> Visited;
+ SmallPtrSet<Instruction *, 8> Active;
+ for (const auto &Pair : SiblingFuncletInfo) {
+ Instruction *PredPad = Pair.first;
+ if (Visited.count(PredPad))
+ continue;
+ Active.insert(PredPad);
+ TerminatorInst *Terminator = Pair.second;
+ do {
+ Instruction *SuccPad = getSuccPad(Terminator);
+ if (Active.count(SuccPad)) {
+ // Found a cycle; report error
+ Instruction *CyclePad = SuccPad;
+ SmallVector<Instruction *, 8> CycleNodes;
+ do {
+ CycleNodes.push_back(CyclePad);
+ TerminatorInst *CycleTerminator = SiblingFuncletInfo[CyclePad];
+ if (CycleTerminator != CyclePad)
+ CycleNodes.push_back(CycleTerminator);
+ CyclePad = getSuccPad(CycleTerminator);
+ } while (CyclePad != SuccPad);
+ Assert(false, "EH pads can't handle each other's exceptions",
+ ArrayRef<Instruction *>(CycleNodes));
+ }
+ // Don't re-walk a node we've already checked
+ if (!Visited.insert(SuccPad).second)
+ break;
+ // Walk to this successor if it has a map entry.
+ PredPad = SuccPad;
+ auto TermI = SiblingFuncletInfo.find(PredPad);
+ if (TermI == SiblingFuncletInfo.end())
+ break;
+ Terminator = TermI->second;
+ Active.insert(PredPad);
+ } while (true);
+ // Each node only has one successor, so we've walked all the active
+ // nodes' successors.
+ Active.clear();
+ }
+}
+
// visitFunction - Verify that a function is ok.
//
void Verifier::visitFunction(const Function &F) {
@@ -2892,6 +2962,13 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
visitInstruction(IVI);
}
+static Value *getParentPad(Value *EHPad) {
+ if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
+ return FPI->getParentPad();
+
+ return cast<CatchSwitchInst>(EHPad)->getParentPad();
+}
+
void Verifier::visitEHPadPredecessors(Instruction &I) {
assert(I.isEHPad());
@@ -2919,16 +2996,45 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
"Block containg CatchPadInst must be jumped to "
"only by its catchswitch.",
CPI);
+ Assert(BB != CPI->getCatchSwitch()->getUnwindDest(),
+ "Catchswitch cannot unwind to one of its catchpads",
+ CPI->getCatchSwitch(), CPI);
return;
}
+ // Verify that each pred has a legal terminator with a legal to/from EH
+ // pad relationship.
+ Instruction *ToPad = &I;
+ Value *ToPadParent = getParentPad(ToPad);
for (BasicBlock *PredBB : predecessors(BB)) {
TerminatorInst *TI = PredBB->getTerminator();
+ Value *FromPad;
if (auto *II = dyn_cast<InvokeInst>(TI)) {
Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
- "EH pad must be jumped to via an unwind edge", &I, II);
- } else if (!isa<CleanupReturnInst>(TI) && !isa<CatchSwitchInst>(TI)) {
- Assert(false, "EH pad must be jumped to via an unwind edge", &I, TI);
+ "EH pad must be jumped to via an unwind edge", ToPad, II);
+ if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet))
+ FromPad = Bundle->Inputs[0];
+ else
+ FromPad = ConstantTokenNone::get(II->getContext());
+ } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ FromPad = CRI->getCleanupPad();
+ Assert(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI);
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
+ FromPad = CSI;
+ } else {
+ Assert(false, "EH pad must be jumped to via an unwind edge", ToPad, TI);
+ }
+
+ // The edge may exit from zero or more nested pads.
+ for (;; FromPad = getParentPad(FromPad)) {
+ Assert(FromPad != ToPad,
+ "EH pad cannot handle exceptions raised within it", FromPad, TI);
+ if (FromPad == ToPadParent) {
+ // This is a legal unwind edge.
+ break;
+ }
+ Assert(!isa<ConstantTokenNone>(FromPad),
+ "A single unwind edge may only enter one EH pad", TI);
}
}
}
@@ -2992,7 +3098,7 @@ void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
Assert(BB->getFirstNonPHI() == &CPI,
"CatchPadInst not the first non-PHI instruction in the block.", &CPI);
- visitInstruction(CPI);
+ visitFuncletPadInst(CPI);
}
void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
@@ -3022,33 +3128,160 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
"CleanupPadInst has an invalid parent.", &CPI);
+ visitFuncletPadInst(CPI);
+}
+
+void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
User *FirstUser = nullptr;
- BasicBlock *FirstUnwindDest = nullptr;
- for (User *U : CPI.users()) {
- BasicBlock *UnwindDest;
- if (CleanupReturnInst *CRI = dyn_cast<CleanupReturnInst>(U)) {
- UnwindDest = CRI->getUnwindDest();
- } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
- continue;
- } else if (CallSite(U)) {
- continue;
- } else {
- Assert(false, "bogus cleanuppad use", &CPI);
+ Value *FirstUnwindPad = nullptr;
+ SmallVector<FuncletPadInst *, 8> Worklist({&FPI});
+ while (!Worklist.empty()) {
+ FuncletPadInst *CurrentPad = Worklist.pop_back_val();
+ Value *UnresolvedAncestorPad = nullptr;
+ for (User *U : CurrentPad->users()) {
+ BasicBlock *UnwindDest;
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(U)) {
+ UnwindDest = CRI->getUnwindDest();
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(U)) {
+ // We allow catchswitch unwind to caller to nest
+ // within an outer pad that unwinds somewhere else,
+ // because catchswitch doesn't have a nounwind variant.
+ // See e.g. SimplifyCFGOpt::SimplifyUnreachable.
+ if (CSI->unwindsToCaller())
+ continue;
+ UnwindDest = CSI->getUnwindDest();
+ } else if (auto *II = dyn_cast<InvokeInst>(U)) {
+ UnwindDest = II->getUnwindDest();
+ } else if (isa<CallInst>(U)) {
+ // Calls which don't unwind may be found inside funclet
+ // pads that unwind somewhere else. We don't *require*
+ // such calls to be annotated nounwind.
+ continue;
+ } else if (auto *CPI = dyn_cast<CleanupPadInst>(U)) {
+ // The unwind dest for a cleanup can only be found by
+ // recursive search. Add it to the worklist, and we'll
+ // search for its first use that determines where it unwinds.
+ Worklist.push_back(CPI);
+ continue;
+ } else {
+ Assert(isa<CatchReturnInst>(U), "Bogus funclet pad use", U);
+ continue;
+ }
+
+ Value *UnwindPad;
+ bool ExitsFPI;
+ if (UnwindDest) {
+ UnwindPad = UnwindDest->getFirstNonPHI();
+ Value *UnwindParent = getParentPad(UnwindPad);
+ // Ignore unwind edges that don't exit CurrentPad.
+ if (UnwindParent == CurrentPad)
+ continue;
+ // Determine whether the original funclet pad is exited,
+ // and if we are scanning nested pads determine how many
+ // of them are exited so we can stop searching their
+ // children.
+ Value *ExitedPad = CurrentPad;
+ ExitsFPI = false;
+ do {
+ if (ExitedPad == &FPI) {
+ ExitsFPI = true;
+ // Now we can resolve any ancestors of CurrentPad up to
+ // FPI, but not including FPI since we need to make sure
+ // to check all direct users of FPI for consistency.
+ UnresolvedAncestorPad = &FPI;
+ break;
+ }
+ Value *ExitedParent = getParentPad(ExitedPad);
+ if (ExitedParent == UnwindParent) {
+ // ExitedPad is the ancestor-most pad which this unwind
+ // edge exits, so we can resolve up to it, meaning that
+ // ExitedParent is the first ancestor still unresolved.
+ UnresolvedAncestorPad = ExitedParent;
+ break;
+ }
+ ExitedPad = ExitedParent;
+ } while (!isa<ConstantTokenNone>(ExitedPad));
+ } else {
+ // Unwinding to caller exits all pads.
+ UnwindPad = ConstantTokenNone::get(FPI.getContext());
+ ExitsFPI = true;
+ UnresolvedAncestorPad = &FPI;
+ }
+
+ if (ExitsFPI) {
+ // This unwind edge exits FPI. Make sure it agrees with other
+ // such edges.
+ if (FirstUser) {
+ Assert(UnwindPad == FirstUnwindPad, "Unwind edges out of a funclet "
+ "pad must have the same unwind "
+ "dest",
+ &FPI, U, FirstUser);
+ } else {
+ FirstUser = U;
+ FirstUnwindPad = UnwindPad;
+ // Record cleanup sibling unwinds for verifySiblingFuncletUnwinds
+ if (isa<CleanupPadInst>(&FPI) && !isa<ConstantTokenNone>(UnwindPad) &&
+ getParentPad(UnwindPad) == getParentPad(&FPI))
+ SiblingFuncletInfo[&FPI] = cast<TerminatorInst>(U);
+ }
+ }
+ // Make sure we visit all uses of FPI, but for nested pads stop as
+ // soon as we know where they unwind to.
+ if (CurrentPad != &FPI)
+ break;
}
+ if (UnresolvedAncestorPad) {
+ if (CurrentPad == UnresolvedAncestorPad) {
+ // When CurrentPad is FPI itself, we don't mark it as resolved even if
+ // we've found an unwind edge that exits it, because we need to verify
+ // all direct uses of FPI.
+ assert(CurrentPad == &FPI);
+ continue;
+ }
+ // Pop off the worklist any nested pads that we've found an unwind
+ // destination for. The pads on the worklist are the uncles,
+ // great-uncles, etc. of CurrentPad. We've found an unwind destination
+ // for all ancestors of CurrentPad up to but not including
+ // UnresolvedAncestorPad.
+ Value *ResolvedPad = CurrentPad;
+ while (!Worklist.empty()) {
+ Value *UnclePad = Worklist.back();
+ Value *AncestorPad = getParentPad(UnclePad);
+ // Walk ResolvedPad up the ancestor list until we either find the
+ // uncle's parent or the last resolved ancestor.
+ while (ResolvedPad != AncestorPad) {
+ Value *ResolvedParent = getParentPad(ResolvedPad);
+ if (ResolvedParent == UnresolvedAncestorPad) {
+ break;
+ }
+ ResolvedPad = ResolvedParent;
+ }
+ // If the resolved ancestor search didn't find the uncle's parent,
+ // then the uncle is not yet resolved.
+ if (ResolvedPad != AncestorPad)
+ break;
+ // This uncle is resolved, so pop it from the worklist.
+ Worklist.pop_back();
+ }
+ }
+ }
- if (!FirstUser) {
- FirstUser = U;
- FirstUnwindDest = UnwindDest;
- } else {
- Assert(
- UnwindDest == FirstUnwindDest,
- "cleanupret instructions from the same cleanuppad must have the same "
- "unwind destination",
- FirstUser, U);
+ if (FirstUnwindPad) {
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FPI.getParentPad())) {
+ BasicBlock *SwitchUnwindDest = CatchSwitch->getUnwindDest();
+ Value *SwitchUnwindPad;
+ if (SwitchUnwindDest)
+ SwitchUnwindPad = SwitchUnwindDest->getFirstNonPHI();
+ else
+ SwitchUnwindPad = ConstantTokenNone::get(FPI.getContext());
+ Assert(SwitchUnwindPad == FirstUnwindPad,
+ "Unwind edges out of a catch must have the same unwind dest as "
+ "the parent catchswitch",
+ &FPI, FirstUser, CatchSwitch);
}
}
- visitInstruction(CPI);
+ visitInstruction(FPI);
}
void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
@@ -3067,17 +3300,21 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
"CatchSwitchInst not the first non-PHI instruction in the block.",
&CatchSwitch);
+ auto *ParentPad = CatchSwitch.getParentPad();
+ Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
+ "CatchSwitchInst has an invalid parent.", ParentPad);
+
if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) {
Instruction *I = UnwindDest->getFirstNonPHI();
Assert(I->isEHPad() && !isa<LandingPadInst>(I),
"CatchSwitchInst must unwind to an EH block which is not a "
"landingpad.",
&CatchSwitch);
- }
- auto *ParentPad = CatchSwitch.getParentPad();
- Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
- "CatchSwitchInst has an invalid parent.", ParentPad);
+ // Record catchswitch sibling unwinds for verifySiblingFuncletUnwinds
+ if (getParentPad(I) == ParentPad)
+ SiblingFuncletInfo[&CatchSwitch] = &CatchSwitch;
+ }
Assert(CatchSwitch.getNumHandlers() != 0,
"CatchSwitchInst cannot have empty handler list", &CatchSwitch);
@@ -3652,6 +3889,9 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
case Intrinsic::experimental_gc_relocate: {
Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS);
+ Assert(isa<PointerType>(CS.getType()->getScalarType()),
+ "gc.relocate must return a pointer or a vector of pointers", CS);
+
// Check that this relocate is correctly tied to the statepoint
// This is case for relocate on the unwinding path of an invoke statepoint
@@ -3734,17 +3974,20 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"'gc parameters' section of the statepoint call",
CS);
- // Relocated value must be a pointer type, but gc_relocate does not need to return the
- // same pointer type as the relocated pointer. It can be casted to the correct type later
- // if it's desired. However, they must have the same address space.
+ // Relocated value must be either a pointer type or vector-of-pointer type,
+ // but gc_relocate does not need to return the same pointer type as the
+ // relocated pointer. It can be casted to the correct type later if it's
+ // desired. However, they must have the same address space and 'vectorness'
GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
- Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(),
+ Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(),
"gc.relocate: relocated value must be a gc pointer", CS);
- // gc_relocate return type must be a pointer type, and is verified earlier in
- // VerifyIntrinsicType().
- Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
- cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(),
+ auto ResultType = CS.getType();
+ auto DerivedType = Relocate.getDerivedPtr()->getType();
+ Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(),
+ "gc.relocate: vector relocates to vector and pointer to pointer", CS);
+ Assert(ResultType->getPointerAddressSpace() ==
+ DerivedType->getPointerAddressSpace(),
"gc.relocate: relocating a pointer shouldn't change its address space", CS);
break;
}
diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
index 6baaaa4..66df23b 100644
--- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -92,7 +92,8 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeSROALegacyPassPass(R);
initializeSROA_DTPass(R);
initializeSROA_SSAUpPass(R);
- initializeFunctionAttrsPass(R);
+ initializePostOrderFunctionAttrsPass(R);
+ initializeReversePostOrderFunctionAttrsPass(R);
initializeGlobalsAAWrapperPassPass(R);
initializeLICMPass(R);
initializeMergedLoadStoreMotionPass(R);
diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp
index 309690f..8dd59f9 100644
--- a/contrib/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@@ -773,6 +773,16 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
NewGV->setLinkage(GlobalValue::ExternalWeakLinkage);
NewGV->copyAttributesFrom(SGV);
+
+ // Remove these copied constants in case this stays a declaration, since
+ // they point to the source module. If the def is linked the values will
+ // be mapped in during linkFunctionBody.
+ if (auto *NewF = dyn_cast<Function>(NewGV)) {
+ NewF->setPersonalityFn(nullptr);
+ NewF->setPrefixData(nullptr);
+ NewF->setPrologueData(nullptr);
+ }
+
return NewGV;
}
@@ -1211,6 +1221,18 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
assert(CU && "Expected valid compile unit");
+ // Ensure that we don't remove subprograms referenced by DIImportedEntity.
+ // It is not legal to have a DIImportedEntity with a null entity or scope.
+ // FIXME: The DISubprogram for functions not linked in but kept due to
+ // being referenced by a DIImportedEntity should also get their
+ // IsDefinition flag is unset.
+ SmallPtrSet<DISubprogram *, 8> ImportedEntitySPs;
+ for (auto *IE : CU->getImportedEntities()) {
+ if (auto *SP = dyn_cast<DISubprogram>(IE->getEntity()))
+ ImportedEntitySPs.insert(SP);
+ if (auto *SP = dyn_cast<DISubprogram>(IE->getScope()))
+ ImportedEntitySPs.insert(SP);
+ }
for (auto *Op : CU->getSubprograms()) {
// Unless we were doing function importing and deferred metadata linking,
// any needed SPs should have been mapped as they would be reached
@@ -1218,7 +1240,7 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
// function bodies, or from DILocation on inlined instructions).
assert(!(ValueMap.MD()[Op] && IsMetadataLinkingPostpass) &&
"DISubprogram shouldn't be mapped yet");
- if (!ValueMap.MD()[Op])
+ if (!ValueMap.MD()[Op] && !ImportedEntitySPs.count(Op))
UnneededSubprograms.insert(Op);
}
}
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index 9de3be4..6ffa71e 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -65,9 +65,6 @@ class ModuleLinker {
return Flags & Linker::InternalizeLinkedSymbols;
}
- /// Check if we should promote the given local value to global scope.
- bool doPromoteLocalToGlobal(const GlobalValue *SGV);
-
bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
const GlobalValue &Src);
@@ -97,11 +94,11 @@ class ModuleLinker {
Module &DstM = Mover.getModule();
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
- if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV)))
+ if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
return nullptr;
// Otherwise see if we have a match in the destination module's symtab.
- GlobalValue *DGV = DstM.getNamedValue(getName(SrcGV));
+ GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
if (!DGV)
return nullptr;
@@ -116,6 +113,64 @@ class ModuleLinker {
bool linkIfNeeded(GlobalValue &GV);
+ /// Helper method to check if we are importing from the current source
+ /// module.
+ bool isPerformingImport() const { return FunctionsToImport != nullptr; }
+
+ /// If we are importing from the source module, checks if we should
+ /// import SGV as a definition, otherwise import as a declaration.
+ bool doImportAsDefinition(const GlobalValue *SGV);
+
+public:
+ ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
+ const FunctionInfoIndex *Index = nullptr,
+ DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
+ : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
+ FunctionsToImport(FunctionsToImport),
+ ValIDToTempMDMap(ValIDToTempMDMap) {
+ assert((ImportIndex || !FunctionsToImport) &&
+ "Expect a FunctionInfoIndex when importing");
+ // If we have a FunctionInfoIndex but no function to import,
+ // then this is the primary module being compiled in a ThinLTO
+ // backend compilation, and we need to see if it has functions that
+ // may be exported to another backend compilation.
+ if (ImportIndex && !FunctionsToImport)
+ HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
+ assert((ValIDToTempMDMap || !FunctionsToImport) &&
+ "Function importing must provide a ValIDToTempMDMap");
+ }
+
+ bool run();
+};
+
+/// Class to handle necessary GlobalValue changes required by ThinLTO including
+/// linkage changes and any necessary renaming.
+class ThinLTOGlobalProcessing {
+ /// The Module which we are exporting or importing functions from.
+ Module &M;
+
+ /// Function index passed in for function importing/exporting handling.
+ const FunctionInfoIndex *ImportIndex;
+
+ /// Functions to import from this module, all other functions will be
+ /// imported as declarations instead of definitions.
+ DenseSet<const GlobalValue *> *FunctionsToImport;
+
+ /// Set to true if the given FunctionInfoIndex contains any functions
+ /// from this source module, in which case we must conservatively assume
+ /// that any of its functions may be imported into another module
+ /// as part of a different backend compilation process.
+ bool HasExportedFunctions = false;
+
+ /// Populated during ThinLTO global processing with locals promoted
+ /// to global scope in an exporting module, which now need to be linked
+ /// in if calling from the ModuleLinker.
+ SetVector<GlobalValue *> NewExportedValues;
+
+ /// Check if we should promote the given local value to global scope.
+ bool doPromoteLocalToGlobal(const GlobalValue *SGV);
+
/// Helper methods to check if we are importing from or potentially
/// exporting from the current source module.
bool isPerformingImport() const { return FunctionsToImport != nullptr; }
@@ -143,32 +198,30 @@ class ModuleLinker {
GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV);
public:
- ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
- const FunctionInfoIndex *Index = nullptr,
- DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
- DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
- : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
- FunctionsToImport(FunctionsToImport),
- ValIDToTempMDMap(ValIDToTempMDMap) {
- assert((ImportIndex || !FunctionsToImport) &&
- "Expect a FunctionInfoIndex when importing");
+ ThinLTOGlobalProcessing(
+ Module &M, const FunctionInfoIndex *Index,
+ DenseSet<const GlobalValue *> *FunctionsToImport = nullptr)
+ : M(M), ImportIndex(Index), FunctionsToImport(FunctionsToImport) {
// If we have a FunctionInfoIndex but no function to import,
// then this is the primary module being compiled in a ThinLTO
// backend compilation, and we need to see if it has functions that
// may be exported to another backend compilation.
- if (ImportIndex && !FunctionsToImport)
- HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
- assert((ValIDToTempMDMap || !FunctionsToImport) &&
- "Function importing must provide a ValIDToTempMDMap");
+ if (!FunctionsToImport)
+ HasExportedFunctions = ImportIndex->hasExportedFunctions(M);
}
bool run();
+
+ /// Access the promoted globals that are now exported and need to be linked.
+ SetVector<GlobalValue *> &getNewExportedValues() { return NewExportedValues; }
};
}
-bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
- if (!isPerformingImport())
- return false;
+/// Checks if we should import SGV as a definition, otherwise import as a
+/// declaration.
+static bool
+doImportAsDefinitionImpl(const GlobalValue *SGV,
+ DenseSet<const GlobalValue *> *FunctionsToImport) {
auto *GA = dyn_cast<GlobalAlias>(SGV);
if (GA) {
if (GA->hasWeakAnyLinkage())
@@ -176,7 +229,7 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
const GlobalObject *GO = GA->getBaseObject();
if (!GO->hasLinkOnceODRLinkage())
return false;
- return doImportAsDefinition(GO);
+ return doImportAsDefinitionImpl(GO, FunctionsToImport);
}
// Always import GlobalVariable definitions, except for the special
// case of WeakAny which are imported as ExternalWeak declarations
@@ -196,7 +249,19 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
return false;
}
-bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
+bool ThinLTOGlobalProcessing::doImportAsDefinition(const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
+ return doImportAsDefinitionImpl(SGV, FunctionsToImport);
+}
+
+bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
+ return doImportAsDefinitionImpl(SGV, FunctionsToImport);
+}
+
+bool ThinLTOGlobalProcessing::doPromoteLocalToGlobal(const GlobalValue *SGV) {
assert(SGV->hasLocalLinkage());
// Both the imported references and the original local variable must
// be promoted.
@@ -220,7 +285,7 @@ bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
return true;
}
-std::string ModuleLinker::getName(const GlobalValue *SGV) {
+std::string ThinLTOGlobalProcessing::getName(const GlobalValue *SGV) {
// For locals that must be promoted to global scope, ensure that
// the promoted name uniquely identifies the copy in the original module,
// using the ID assigned during combined index creation. When importing,
@@ -234,7 +299,8 @@ std::string ModuleLinker::getName(const GlobalValue *SGV) {
return SGV->getName();
}
-GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
+GlobalValue::LinkageTypes
+ThinLTOGlobalProcessing::getLinkage(const GlobalValue *SGV) {
// Any local variable that is referenced by an exported function needs
// to be promoted to global scope. Since we don't currently know which
// functions reference which local variables/functions, we must treat
@@ -298,8 +364,7 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
// since it would cause global constructors/destructors to be
// executed multiple times. This should have already been handled
// by linkIfNeeded, and we will assert in shouldLinkFromSource
- // if we try to import, so we simply return AppendingLinkage here
- // as this helper is called more widely in getLinkedToGlobal.
+ // if we try to import, so we simply return AppendingLinkage.
return GlobalValue::AppendingLinkage;
case GlobalValue::InternalLinkage:
@@ -652,7 +717,7 @@ void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) {
}
}
-void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
+void ThinLTOGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
if (GV.hasLocalLinkage() &&
(doPromoteLocalToGlobal(&GV) || isPerformingImport())) {
GV.setName(getName(&GV));
@@ -660,21 +725,26 @@ void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
if (!GV.hasLocalLinkage())
GV.setVisibility(GlobalValue::HiddenVisibility);
if (isModuleExporting())
- ValuesToLink.insert(&GV);
+ NewExportedValues.insert(&GV);
return;
}
GV.setLinkage(getLinkage(&GV));
}
-void ModuleLinker::processGlobalsForThinLTO() {
- for (GlobalVariable &GV : SrcM.globals())
+void ThinLTOGlobalProcessing::processGlobalsForThinLTO() {
+ for (GlobalVariable &GV : M.globals())
processGlobalForThinLTO(GV);
- for (Function &SF : SrcM)
+ for (Function &SF : M)
processGlobalForThinLTO(SF);
- for (GlobalAlias &GA : SrcM.aliases())
+ for (GlobalAlias &GA : M.aliases())
processGlobalForThinLTO(GA);
}
+bool ThinLTOGlobalProcessing::run() {
+ processGlobalsForThinLTO();
+ return false;
+}
+
bool ModuleLinker::run() {
for (const auto &SMEC : SrcM.getComdatSymbolTable()) {
const Comdat &C = SMEC.getValue();
@@ -713,7 +783,14 @@ bool ModuleLinker::run() {
if (linkIfNeeded(GA))
return true;
- processGlobalsForThinLTO();
+ if (ImportIndex) {
+ ThinLTOGlobalProcessing ThinLTOProcessing(SrcM, ImportIndex,
+ FunctionsToImport);
+ if (ThinLTOProcessing.run())
+ return true;
+ for (auto *GV : ThinLTOProcessing.getNewExportedValues())
+ ValuesToLink.insert(GV);
+ }
for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
GlobalValue *GV = ValuesToLink[I];
@@ -786,15 +863,9 @@ bool Linker::linkModules(Module &Dest, std::unique_ptr<Module> Src,
return L.linkInModule(std::move(Src), Flags);
}
-std::unique_ptr<Module>
-llvm::renameModuleForThinLTO(std::unique_ptr<Module> M,
- const FunctionInfoIndex *Index) {
- std::unique_ptr<llvm::Module> RenamedModule(
- new llvm::Module(M->getModuleIdentifier(), M->getContext()));
- Linker L(*RenamedModule.get());
- if (L.linkInModule(std::move(M), llvm::Linker::Flags::None, Index))
- return nullptr;
- return RenamedModule;
+bool llvm::renameModuleForThinLTO(Module &M, const FunctionInfoIndex *Index) {
+ ThinLTOGlobalProcessing ThinLTOProcessing(M, Index);
+ return ThinLTOProcessing.run();
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index 0f26b38..748644b 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -300,6 +300,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Hexagon_LD_PLT: return "LDPLT";
case VK_Hexagon_IE: return "IE";
case VK_Hexagon_IE_GOT: return "IEGOT";
+ case VK_WebAssembly_FUNCTION: return "FUNCTION";
case VK_TPREL: return "tprel";
case VK_DTPREL: return "dtprel";
}
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index 34f49ca..f86f7e4 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -256,6 +256,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
DwarfRangesSection =
Ctx->getMachOSection("__DWARF", "__debug_ranges", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata(), "debug_range");
+ DwarfMacinfoSection =
+ Ctx->getMachOSection("__DWARF", "__debug_macinfo", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfDebugInlineSection =
Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
@@ -505,6 +508,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0);
DwarfRangesSection =
Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range");
+ DwarfMacinfoSection =
+ Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0);
// DWARF5 Experimental Debug Info
@@ -684,6 +689,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata(), "debug_range");
+ DwarfMacinfoSection = Ctx->getCOFFSection(
+ ".debug_macinfo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfInfoDWOSection = Ctx->getCOFFSection(
".debug_info.dwo",
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index d0a7daf..972610a 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -64,8 +64,6 @@ void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
return;
}
- assert(Hi->getOffset() >= Lo->getOffset() &&
- "Expected Hi to be greater than Lo");
EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size);
}
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index a382090..a76cbdb 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -969,9 +969,6 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
Header.PointerToSymbolTable = offset;
- // FIXME: Remove the #else branch and make the #if branch unconditional once
- // LLVM's self host configuration is aware of /Brepro.
-#if (ENABLE_TIMESTAMPS == 1)
// MS LINK expects to be able to use this timestamp to implement their
// /INCREMENTAL feature.
if (Asm.isIncrementalLinkerCompatible()) {
@@ -980,12 +977,9 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
Now = UINT32_MAX;
Header.TimeDateStamp = Now;
} else {
+ // Have deterministic output if /INCREMENTAL isn't needed. Also matches GNU.
Header.TimeDateStamp = 0;
}
-#else
- // We want a deterministic output. It looks like GNU as also writes 0 in here.
- Header.TimeDateStamp = 0;
-#endif
// Write it all to disk...
WriteFileHeader(Header);
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index 1f21117..4cd6aff 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -1336,6 +1336,30 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
return std::error_code();
}
+std::error_code ExportDirectoryEntryRef::isForwarder(bool &Result) const {
+ const data_directory *DataEntry;
+ if (auto EC = OwningObject->getDataDirectory(COFF::EXPORT_TABLE, DataEntry))
+ return EC;
+ uint32_t RVA;
+ if (auto EC = getExportRVA(RVA))
+ return EC;
+ uint32_t Begin = DataEntry->RelativeVirtualAddress;
+ uint32_t End = DataEntry->RelativeVirtualAddress + DataEntry->Size;
+ Result = (Begin <= RVA && RVA < End);
+ return std::error_code();
+}
+
+std::error_code ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const {
+ uint32_t RVA;
+ if (auto EC = getExportRVA(RVA))
+ return EC;
+ uintptr_t IntPtr = 0;
+ if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ return EC;
+ Result = StringRef(reinterpret_cast<const char *>(IntPtr));
+ return std::error_code();
+}
+
bool ImportedSymbolRef::
operator==(const ImportedSymbolRef &Other) const {
return Entry32 == Other.Entry32 && Entry64 == Other.Entry64
diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp
index 62c27cc..12b772d 100644
--- a/contrib/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm/lib/Object/ELF.cpp
@@ -91,6 +91,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
break;
}
break;
+ case ELF::EM_WEBASSEMBLY:
+ switch (Type) {
+#include "llvm/Support/ELFRelocs/WebAssembly.def"
+ default:
+ break;
+ }
+ break;
default:
break;
}
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
index 55c0fb4..f5d477b 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp
@@ -517,6 +517,6 @@ class CoverageMappingErrorCategoryType : public std::error_category {
static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory;
-const std::error_category &llvm::coveragemap_category() {
+const std::error_category &llvm::coverage::coveragemap_category() {
return *ErrorCategory;
}
diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
index 32c692d..89e1cf4 100644
--- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp
@@ -319,21 +319,14 @@ static std::error_code readCoverageMappingData(
if (Buf + sizeof(CovMapHeader) > End)
return coveragemap_error::malformed;
auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
- uint32_t NRecords =
- endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords);
- uint32_t FilenamesSize =
- endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize);
- uint32_t CoverageSize =
- endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize);
- uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version);
+ uint32_t NRecords = CovHeader->getNRecords<Endian>();
+ uint32_t FilenamesSize = CovHeader->getFilenamesSize<Endian>();
+ uint32_t CoverageSize = CovHeader->getCoverageSize<Endian>();
+ uint32_t Version = CovHeader->getVersion<Endian>();
Buf = reinterpret_cast<const char *>(++CovHeader);
- switch (Version) {
- case CoverageMappingVersion1:
- break;
- default:
+ if (Version > coverage::CoverageMappingCurrentVersion)
return coveragemap_error::unsupported_version;
- }
// Skip past the function records, saving the start and end for later.
const char *FunBuf = Buf;
@@ -364,11 +357,8 @@ static std::error_code readCoverageMappingData(
reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf);
while ((const char *)CFR < FunEnd) {
// Read the function information
- T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr);
- uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize);
- uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize);
- uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash);
- CFR++;
+ uint32_t DataSize = CFR->template getDataSize<Endian>();
+ uint64_t FuncHash = CFR->template getFuncHash<Endian>();
// Now use that to read the coverage data.
if (CovBuf + DataSize > CovEnd)
@@ -379,16 +369,18 @@ static std::error_code readCoverageMappingData(
// Ignore this record if we already have a record that points to the same
// function name. This is useful to ignore the redundant records for the
// functions with ODR linkage.
- if (!UniqueFunctionMappingData.insert(NamePtr).second)
+ T NameRef = CFR->template getFuncNameRef<Endian>();
+ if (!UniqueFunctionMappingData.insert(NameRef).second)
continue;
- // Finally, grab the name and create a record.
- StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize);
- if (NameSize && FuncName.empty())
- return coveragemap_error::malformed;
+ StringRef FuncName;
+ if (std::error_code EC =
+ CFR->template getFuncName<Endian>(ProfileNames, FuncName))
+ return EC;
Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
FilenamesBegin, Filenames.size() - FilenamesBegin));
+ CFR++;
}
}
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 027f0f7..d677763 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -257,9 +257,8 @@ int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
return 0;
}
-instrprof_error
-InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
- uint64_t Weight) {
+instrprof_error InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
+ uint64_t Weight) {
this->sortByTargetValues();
Input.sortByTargetValues();
auto I = ValueData.begin();
@@ -270,14 +269,8 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
while (I != IE && I->Value < J->Value)
++I;
if (I != IE && I->Value == J->Value) {
- uint64_t JCount = J->Count;
bool Overflowed;
- if (Weight > 1) {
- JCount = SaturatingMultiply(JCount, Weight, &Overflowed);
- if (Overflowed)
- Result = instrprof_error::counter_overflow;
- }
- I->Count = SaturatingAdd(I->Count, JCount, &Overflowed);
+ I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed);
if (Overflowed)
Result = instrprof_error::counter_overflow;
++I;
@@ -288,6 +281,17 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
return Result;
}
+instrprof_error InstrProfValueSiteRecord::scale(uint64_t Weight) {
+ instrprof_error Result = instrprof_error::success;
+ for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
+ bool Overflowed;
+ I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
+ if (Overflowed)
+ Result = instrprof_error::counter_overflow;
+ }
+ return Result;
+}
+
// Merge Value Profile data from Src record to this record for ValueKind.
// Scale merged value counts by \p Weight.
instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
@@ -303,8 +307,7 @@ instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
Src.getValueSitesForKind(ValueKind);
instrprof_error Result = instrprof_error::success;
for (uint32_t I = 0; I < ThisNumValueSites; I++)
- MergeResult(Result,
- ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight));
+ MergeResult(Result, ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight));
return Result;
}
@@ -319,13 +322,8 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
bool Overflowed;
- uint64_t OtherCount = Other.Counts[I];
- if (Weight > 1) {
- OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed);
- if (Overflowed)
- Result = instrprof_error::counter_overflow;
- }
- Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed);
+ Counts[I] =
+ SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed);
if (Overflowed)
Result = instrprof_error::counter_overflow;
}
@@ -336,6 +334,32 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
return Result;
}
+instrprof_error InstrProfRecord::scaleValueProfData(uint32_t ValueKind,
+ uint64_t Weight) {
+ uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
+ std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
+ getValueSitesForKind(ValueKind);
+ instrprof_error Result = instrprof_error::success;
+ for (uint32_t I = 0; I < ThisNumValueSites; I++)
+ MergeResult(Result, ThisSiteRecords[I].scale(Weight));
+ return Result;
+}
+
+instrprof_error InstrProfRecord::scale(uint64_t Weight) {
+ instrprof_error Result = instrprof_error::success;
+ for (auto &Count : this->Counts) {
+ bool Overflowed;
+ Count = SaturatingMultiply(Count, Weight, &Overflowed);
+ if (Overflowed && Result == instrprof_error::success) {
+ Result = instrprof_error::counter_overflow;
+ }
+ }
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ MergeResult(Result, scaleValueProfData(Kind, Weight));
+
+ return Result;
+}
+
// Map indirect call target name hash to name string.
uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
ValueMapType *ValueMap) {
diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
index 9bb03e1..f522724 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -104,27 +104,21 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord()));
InstrProfRecord &Dest = Where->second;
- instrprof_error Result;
+ instrprof_error Result = instrprof_error::success;
if (NewFunc) {
// We've never seen a function with this name and hash, add it.
Dest = std::move(I);
// Fix up the name to avoid dangling reference.
Dest.Name = FunctionData.find(Dest.Name)->getKey();
- Result = instrprof_error::success;
- if (Weight > 1) {
- for (auto &Count : Dest.Counts) {
- bool Overflowed;
- Count = SaturatingMultiply(Count, Weight, &Overflowed);
- if (Overflowed && Result == instrprof_error::success) {
- Result = instrprof_error::counter_overflow;
- }
- }
- }
+ if (Weight > 1)
+ Result = Dest.scale(Weight);
} else {
// We're updating a function we've seen before.
Result = Dest.merge(I, Weight);
}
+ Dest.sortValueData();
+
// We keep track of the max function count as we go for simplicity.
// Update this statistic no matter the result of the merge.
if (Dest.Counts[0] > MaxFunctionCount)
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
index 47751fc..323d532 100644
--- a/contrib/llvm/lib/Support/Debug.cpp
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -95,7 +95,10 @@ struct DebugOnlyOpt {
if (Val.empty())
return;
DebugFlag = true;
- CurrentDebugType->push_back(Val);
+ SmallVector<StringRef,8> dbgTypes;
+ StringRef(Val).split(dbgTypes, ',', -1, false);
+ for (auto dbgType : dbgTypes)
+ CurrentDebugType->push_back(dbgType);
}
};
@@ -104,10 +107,9 @@ struct DebugOnlyOpt {
static DebugOnlyOpt DebugOnlyOptLoc;
static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
-DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output (comma separated list of types)"),
cl::Hidden, cl::ZeroOrMore, cl::value_desc("debug string"),
cl::location(DebugOnlyOptLoc), cl::ValueRequired);
-
// Signal handlers - dump debug output on termination.
static void debug_user_sig_handler(void *Cookie) {
// This is a bit sneaky. Since this is under #ifndef NDEBUG, we
diff --git a/contrib/llvm/lib/Support/IntEqClasses.cpp b/contrib/llvm/lib/Support/IntEqClasses.cpp
index 1134495..ff21357 100644
--- a/contrib/llvm/lib/Support/IntEqClasses.cpp
+++ b/contrib/llvm/lib/Support/IntEqClasses.cpp
@@ -29,7 +29,7 @@ void IntEqClasses::grow(unsigned N) {
EC.push_back(EC.size());
}
-void IntEqClasses::join(unsigned a, unsigned b) {
+unsigned IntEqClasses::join(unsigned a, unsigned b) {
assert(NumClasses == 0 && "join() called after compress().");
unsigned eca = EC[a];
unsigned ecb = EC[b];
@@ -41,6 +41,8 @@ void IntEqClasses::join(unsigned a, unsigned b) {
EC[b] = eca, b = ecb, ecb = EC[b];
else
EC[a] = ecb, a = eca, eca = EC[a];
+
+ return eca;
}
unsigned IntEqClasses::findLeader(unsigned a) const {
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index 3bb1116..0e5d3ac 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -1154,8 +1154,6 @@ Triple Triple::get32BitArchVariant() const {
Triple T(*this);
switch (getArch()) {
case Triple::UnknownArch:
- case Triple::aarch64:
- case Triple::aarch64_be:
case Triple::amdgcn:
case Triple::avr:
case Triple::bpfel:
@@ -1191,17 +1189,19 @@ Triple Triple::get32BitArchVariant() const {
// Already 32-bit.
break;
- case Triple::le64: T.setArch(Triple::le32); break;
- case Triple::mips64: T.setArch(Triple::mips); break;
- case Triple::mips64el: T.setArch(Triple::mipsel); break;
- case Triple::nvptx64: T.setArch(Triple::nvptx); break;
- case Triple::ppc64: T.setArch(Triple::ppc); break;
- case Triple::sparcv9: T.setArch(Triple::sparc); break;
- case Triple::x86_64: T.setArch(Triple::x86); break;
- case Triple::amdil64: T.setArch(Triple::amdil); break;
- case Triple::hsail64: T.setArch(Triple::hsail); break;
- case Triple::spir64: T.setArch(Triple::spir); break;
- case Triple::wasm64: T.setArch(Triple::wasm32); break;
+ case Triple::aarch64: T.setArch(Triple::arm); break;
+ case Triple::aarch64_be: T.setArch(Triple::armeb); break;
+ case Triple::le64: T.setArch(Triple::le32); break;
+ case Triple::mips64: T.setArch(Triple::mips); break;
+ case Triple::mips64el: T.setArch(Triple::mipsel); break;
+ case Triple::nvptx64: T.setArch(Triple::nvptx); break;
+ case Triple::ppc64: T.setArch(Triple::ppc); break;
+ case Triple::sparcv9: T.setArch(Triple::sparc); break;
+ case Triple::x86_64: T.setArch(Triple::x86); break;
+ case Triple::amdil64: T.setArch(Triple::amdil); break;
+ case Triple::hsail64: T.setArch(Triple::hsail); break;
+ case Triple::spir64: T.setArch(Triple::spir); break;
+ case Triple::wasm64: T.setArch(Triple::wasm32); break;
}
return T;
}
@@ -1210,16 +1210,12 @@ Triple Triple::get64BitArchVariant() const {
Triple T(*this);
switch (getArch()) {
case Triple::UnknownArch:
- case Triple::arm:
- case Triple::armeb:
case Triple::avr:
case Triple::hexagon:
case Triple::kalimba:
case Triple::msp430:
case Triple::r600:
case Triple::tce:
- case Triple::thumb:
- case Triple::thumbeb:
case Triple::xcore:
case Triple::sparcel:
case Triple::shave:
@@ -1247,17 +1243,21 @@ Triple Triple::get64BitArchVariant() const {
// Already 64-bit.
break;
- case Triple::le32: T.setArch(Triple::le64); break;
- case Triple::mips: T.setArch(Triple::mips64); break;
- case Triple::mipsel: T.setArch(Triple::mips64el); break;
- case Triple::nvptx: T.setArch(Triple::nvptx64); break;
- case Triple::ppc: T.setArch(Triple::ppc64); break;
- case Triple::sparc: T.setArch(Triple::sparcv9); break;
- case Triple::x86: T.setArch(Triple::x86_64); break;
- case Triple::amdil: T.setArch(Triple::amdil64); break;
- case Triple::hsail: T.setArch(Triple::hsail64); break;
- case Triple::spir: T.setArch(Triple::spir64); break;
- case Triple::wasm32: T.setArch(Triple::wasm64); break;
+ case Triple::arm: T.setArch(Triple::aarch64); break;
+ case Triple::armeb: T.setArch(Triple::aarch64_be); break;
+ case Triple::le32: T.setArch(Triple::le64); break;
+ case Triple::mips: T.setArch(Triple::mips64); break;
+ case Triple::mipsel: T.setArch(Triple::mips64el); break;
+ case Triple::nvptx: T.setArch(Triple::nvptx64); break;
+ case Triple::ppc: T.setArch(Triple::ppc64); break;
+ case Triple::sparc: T.setArch(Triple::sparcv9); break;
+ case Triple::x86: T.setArch(Triple::x86_64); break;
+ case Triple::amdil: T.setArch(Triple::amdil64); break;
+ case Triple::hsail: T.setArch(Triple::hsail64); break;
+ case Triple::spir: T.setArch(Triple::spir64); break;
+ case Triple::thumb: T.setArch(Triple::aarch64); break;
+ case Triple::thumbeb: T.setArch(Triple::aarch64_be); break;
+ case Triple::wasm32: T.setArch(Triple::wasm64); break;
}
return T;
}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index 4e48412..5ef77b1 100644
--- a/contrib/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -38,6 +38,7 @@ typedef int errno_t;
#ifdef _MSC_VER
# pragma comment(lib, "advapi32.lib") // This provides CryptAcquireContextW.
+# pragma comment(lib, "ole32.lib") // This provides CoTaskMemFree
#endif
using namespace llvm;
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index d109a66..f40ca72 100644
--- a/contrib/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -405,7 +405,10 @@ static void RegisterHandler() {
// If we cannot load up the APIs (which would be unexpected as they should
// exist on every version of Windows we support), we will bail out since
// there would be nothing to report.
- assert(load64BitDebugHelp() && "These APIs should always be available");
+ if (!load64BitDebugHelp()) {
+ assert(false && "These APIs should always be available");
+ return;
+ }
if (RegisteredUnhandledExceptionFilter) {
EnterCriticalSection(&CriticalSection);
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index c65e314..60490f2 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -47,20 +47,27 @@
#include <string>
#include <vector>
-#if !defined(__CYGWIN__) && !defined(__MINGW32__)
-#include <VersionHelpers.h>
-#else
-// Cygwin does not have the IsWindows8OrGreater() API.
-// Some version of mingw does not have the API either.
-inline bool IsWindows8OrGreater() {
- OSVERSIONINFO osvi = {};
+/// Determines if the program is running on Windows 8 or newer. This
+/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
+/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
+/// yet have VersionHelpers.h, so we have our own helper.
+inline bool RunningWindows8OrGreater() {
+ // Windows 8 is version 6.2, service pack 0.
+ OSVERSIONINFOEXW osvi = {};
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
- if (!::GetVersionEx(&osvi))
- return false;
- return (osvi.dwMajorVersion > 6 ||
- (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2));
+ osvi.dwMajorVersion = 6;
+ osvi.dwMinorVersion = 2;
+ osvi.wServicePackMajor = 0;
+
+ DWORDLONG Mask = 0;
+ Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
+ Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
+ Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
+
+ return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
+ VER_SERVICEPACKMAJOR,
+ Mask) != FALSE;
}
-#endif // __CYGWIN__
inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
if (!ErrMsg)
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 57162dc..15813fd 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -577,7 +577,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
// Writing a large size of output to Windows console returns ENOMEM. It seems
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
// the latter has a size limit (66000 bytes or less, depending on heap usage).
- bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater();
+ bool ShouldWriteInChunks = !!::_isatty(FD) && !RunningWindows8OrGreater();
#endif
do {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3ef3c8b..f398117 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2487,15 +2487,36 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
return true;
}
-/// Return true when there is potentially a faster code sequence
-/// for an instruction chain ending in \p Root. All potential patterns are
-/// listed
-/// in the \p Pattern vector. Pattern should be sorted in priority order since
-/// the pattern evaluator stops checking as soon as it finds a faster sequence.
+// TODO: There are many more machine instruction opcodes to match:
+// 1. Other data types (integer, vectors)
+// 2. Other math / logic operations (xor, or)
+// 3. Other forms of the same operation (intrinsics and other variants)
+bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ case AArch64::FADDDrr:
+ case AArch64::FADDSrr:
+ case AArch64::FADDv2f32:
+ case AArch64::FADDv2f64:
+ case AArch64::FADDv4f32:
+ case AArch64::FMULDrr:
+ case AArch64::FMULSrr:
+ case AArch64::FMULX32:
+ case AArch64::FMULX64:
+ case AArch64::FMULXv2f32:
+ case AArch64::FMULXv2f64:
+ case AArch64::FMULXv4f32:
+ case AArch64::FMULv2f32:
+ case AArch64::FMULv2f64:
+ case AArch64::FMULv4f32:
+ return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+ default:
+ return false;
+ }
+}
-bool AArch64InstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+/// Find instructions that can be turned into madd.
+static bool getMaddPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -2600,6 +2621,20 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
return Found;
}
+/// Return true when there is potentially a faster code sequence for an
+/// instruction chain ending in \p Root. All potential patterns are listed in
+/// the \p Pattern vector. Pattern should be sorted in priority order since the
+/// pattern evaluator stops checking as soon as it finds a faster sequence.
+
+bool AArch64InstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ if (getMaddPatterns(Root, Patterns))
+ return true;
+
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+}
+
/// genMadd - Generate madd instruction and combine mul and add.
/// Example:
/// MUL I=A,B,0
@@ -2713,8 +2748,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
unsigned Opc;
switch (Pattern) {
default:
- // signal error.
- break;
+ // Reassociate instructions.
+ TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+ DelInstrs, InstrIdxForVirtReg);
+ return;
case MachineCombinerPattern::MULADDW_OP1:
case MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index ae02822..b5bb446 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -169,7 +169,9 @@ public:
bool getMachineCombinerPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns)
const override;
-
+ /// Return true when Inst is associative and commutative so that it can be
+ /// reassociated.
+ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence
void genAlternativeCodeSequence(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index 8c3cb56..5d00e1c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -20,8 +20,10 @@ class AMDGPUInstrPrinter;
class AMDGPUSubtarget;
class AMDGPUTargetMachine;
class FunctionPass;
+class MachineSchedContext;
class MCAsmInfo;
class raw_ostream;
+class ScheduleDAGInstrs;
class Target;
class TargetMachine;
@@ -49,6 +51,8 @@ FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
+ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C);
+
ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 9c37902..1239dfb2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -91,6 +91,25 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
+void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
+
+ // Need to construct an MCSubtargetInfo here in case we have no functions
+ // in the module.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple().str(), TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+
+ TS->EmitDirectiveHSACodeObjectVersion(1, 0);
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
+ TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
+ "AMD", "AMDGPU");
+}
+
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
@@ -148,11 +167,15 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName());
}
+ MCSymbolELF *GVSym = cast<MCSymbolELF>(getSymbol(GV));
const DataLayout &DL = getDataLayout();
+
+ // Emit the size
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ OutStreamer->emitELFSize(GVSym, MCConstantExpr::create(Size, OutContext));
OutStreamer->PushSection();
OutStreamer->SwitchSection(
getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
- MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
OutStreamer->EmitLabel(GVSym);
EmitGlobalConstant(DL, C);
@@ -178,13 +201,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, KernelInfo);
}
- // Emit directives
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
- TS->EmitDirectiveHSACodeObjectVersion(1, 0);
- AMDGPU::IsaVersion ISA = STM.getIsaVersion();
- TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
- "AMD", "AMDGPU");
} else {
EmitProgramInfoR600(MF);
}
@@ -417,16 +433,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
}
- if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
- MaxSGPR += 2;
+ unsigned ExtraSGPRs = 0;
- if (FlatUsed)
- MaxSGPR += 2;
+ if (VCCUsed)
+ ExtraSGPRs = 2;
+ if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (FlatUsed)
+ ExtraSGPRs = 4;
+ } else {
if (STM.isXNACKEnabled())
- MaxSGPR += 2;
+ ExtraSGPRs = 4;
+
+ if (FlatUsed)
+ ExtraSGPRs = 6;
}
+ MaxSGPR += ExtraSGPRs;
+
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
ProgInfo.NumVGPR = MaxVGPR + 1;
@@ -563,7 +587,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
+ OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
+ OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 817cbfc..99d4091 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -103,6 +103,8 @@ public:
void EmitGlobalVariable(const GlobalVariable *GV) override;
+ void EmitStartOfAsmFile(Module &M) override;
+
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 6ffa7a0..b0db261 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -20,28 +20,83 @@ def CC_SI : CallingConv<[
CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
- SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21
+ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
+ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
]>>>,
CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
+ SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
+ SGPR32, SGPR34, SGPR36, SGPR38 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
+ SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
+ SGPR33, SGPR35, SGPR37, SGPR39 ]
>>>,
+ // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
- VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
+ VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
+ VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
+ VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
+ VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
+ VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
+ VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
+ VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
+ VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
+ VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
+ VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
+ VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
+ VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
+ VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
]>>>,
CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
+ SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
+ SGPR32, SGPR34, SGPR36, SGPR38 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
+ SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
+ SGPR33, SGPR35, SGPR37, SGPR39 ]
>>>
]>;
+def RetCC_SI : CallingConv<[
+ CCIfType<[i32] , CCAssignToReg<[
+ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
+ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
+ ]>>,
+
+ // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
+ CCIfType<[f32] , CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
+ VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
+ VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
+ VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
+ VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
+ VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
+ VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
+ VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
+ VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
+ VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
+ VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
+ VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
+ VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
+ VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
+ ]>>
+]>;
+
// Calling convention for R600
def CC_R600 : CallingConv<[
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 222f631..1a59a46 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -282,12 +282,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::SMAX, MVT::i32, Legal);
setOperationAction(ISD::UMAX, MVT::i32, Legal);
- if (!Subtarget->hasFFBH())
+ if (Subtarget->hasFFBH())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+ else
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
if (!Subtarget->hasFFBL())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTLZ, MVT::i64, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+
static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
};
@@ -565,6 +572,12 @@ void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
}
+void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) const {
+
+ State.AnalyzeReturn(Outs, RetCC_SI);
+}
+
SDValue AMDGPUTargetLowering::LowerReturn(
SDValue Chain,
CallingConv::ID CallConv,
@@ -633,6 +646,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return LowerCTLZ(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return Op;
@@ -2159,6 +2175,145 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
+SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+ bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
+
+ if (ZeroUndef && Src.getValueType() == MVT::i32)
+ return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src);
+
+ SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+ const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), MVT::i32);
+
+ SDValue Hi0 = DAG.getSetCC(SL, SetCCVT, Hi, Zero, ISD::SETEQ);
+
+ SDValue CtlzLo = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Lo);
+ SDValue CtlzHi = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Hi);
+
+ const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
+ SDValue Add = DAG.getNode(ISD::ADD, SL, MVT::i32, CtlzLo, Bits32);
+
+ // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
+ SDValue NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0, Add, CtlzHi);
+
+ if (!ZeroUndef) {
+ // Test if the full 64-bit input is zero.
+
+ // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
+ // which we probably don't want.
+ SDValue Lo0 = DAG.getSetCC(SL, SetCCVT, Lo, Zero, ISD::SETEQ);
+ SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0, Hi0);
+
+ // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
+ // with the same cycles, otherwise it is slower.
+ // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src,
+ // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ);
+
+ const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32);
+
+ // The instruction returns -1 for 0 input, but the defined intrinsic
+ // behavior is to return the number of bits.
+ NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32,
+ SrcIsZero, Bits32, NewCtlz);
+ }
+
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewCtlz);
+}
+
+SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ // Unsigned
+ // cul2f(ulong u)
+ //{
+ // uint lz = clz(u);
+ // uint e = (u != 0) ? 127U + 63U - lz : 0;
+ // u = (u << lz) & 0x7fffffffffffffffUL;
+ // ulong t = u & 0xffffffffffUL;
+ // uint v = (e << 23) | (uint)(u >> 40);
+ // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+ // return as_float(v + r);
+ //}
+ // Signed
+ // cl2f(long l)
+ //{
+ // long s = l >> 63;
+ // float r = cul2f((l + s) ^ s);
+ // return s ? -r : r;
+ //}
+
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue L = Src;
+
+ SDValue S;
+ if (Signed) {
+ const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64);
+ S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit);
+
+ SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S);
+ L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S);
+ }
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), MVT::f32);
+
+
+ SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
+ SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64);
+ SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L);
+ LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ);
+
+ SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32);
+ SDValue E = DAG.getSelect(SL, MVT::i32,
+ DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE),
+ DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ),
+ ZeroI32);
+
+ SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64,
+ DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ),
+ DAG.getConstant((-1ULL) >> 1, SL, MVT::i64));
+
+ SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U,
+ DAG.getConstant(0xffffffffffULL, SL, MVT::i64));
+
+ SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64,
+ U, DAG.getConstant(40, SL, MVT::i64));
+
+ SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32,
+ DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)),
+ DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl));
+
+ SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64);
+ SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT);
+ SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ);
+
+ SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+ SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One);
+
+ SDValue R = DAG.getSelect(SL, MVT::i32,
+ RCmp,
+ One,
+ DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32));
+ R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R);
+ R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R);
+
+ if (!Signed)
+ return R;
+
+ SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R);
+ return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R);
+}
+
SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
SDLoc SL(Op);
@@ -2184,35 +2339,29 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue S0 = Op.getOperand(0);
- if (S0.getValueType() != MVT::i64)
- return SDValue();
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "operation should be legal");
EVT DestVT = Op.getValueType();
if (DestVT == MVT::f64)
return LowerINT_TO_FP64(Op, DAG, false);
- assert(DestVT == MVT::f32);
-
- SDLoc DL(Op);
+ if (DestVT == MVT::f32)
+ return LowerINT_TO_FP32(Op, DAG, false);
- // f32 uint_to_fp i64
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(1, DL, MVT::i32));
- SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
- // TODO: Should this propagate fast-math-flags?
- FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
- DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
- return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+ return SDValue();
}
SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
- if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "operation should be legal");
+
+ EVT DestVT = Op.getValueType();
+ if (DestVT == MVT::f32)
+ return LowerINT_TO_FP32(Op, DAG, true);
+
+ if (DestVT == MVT::f64)
return LowerINT_TO_FP64(Op, DAG, true);
return SDValue();
@@ -2447,6 +2596,97 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
return DAG.getSExtOrTrunc(Mul, DL, VT);
}
+static bool isNegativeOne(SDValue Val) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
+ return C->isAllOnesValue();
+ return false;
+}
+
+static bool isCtlzOpc(unsigned Opc) {
+ return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
+}
+
+// Get FFBH node if the incoming op may have been type legalized from a smaller
+// type VT.
+// Need to match pre-legalized type because the generic legalization inserts the
+// add/sub between the select and compare.
+static SDValue getFFBH_U32(const TargetLowering &TLI,
+ SelectionDAG &DAG, SDLoc SL, SDValue Op) {
+ EVT VT = Op.getValueType();
+ EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (LegalVT != MVT::i32)
+ return SDValue();
+
+ if (VT != MVT::i32)
+ Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op);
+
+ SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op);
+ if (VT != MVT::i32)
+ FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH);
+
+ return FFBH;
+}
+
+// The native instructions return -1 on 0 input. Optimize out a select that
+// produces -1 on 0.
+//
+// TODO: If zero is not undef, we could also do this if the output is compared
+// against the bitwidth.
+//
+// TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
+SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
+ SDValue Cond,
+ SDValue LHS,
+ SDValue RHS,
+ DAGCombinerInfo &DCI) const {
+ ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (!CmpRhs || !CmpRhs->isNullValue())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ SDValue CmpLHS = Cond.getOperand(0);
+
+ // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
+ if (CCOpcode == ISD::SETEQ &&
+ isCtlzOpc(RHS.getOpcode()) &&
+ RHS.getOperand(0) == CmpLHS &&
+ isNegativeOne(LHS)) {
+ return getFFBH_U32(*this, DAG, SL, CmpLHS);
+ }
+
+ // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
+ if (CCOpcode == ISD::SETNE &&
+ isCtlzOpc(LHS.getOpcode()) &&
+ LHS.getOperand(0) == CmpLHS &&
+ isNegativeOne(RHS)) {
+ return getFFBH_U32(*this, DAG, SL, CmpLHS);
+ }
+
+ return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ SDValue CC = Cond.getOperand(2);
+
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+
+ if (VT == MVT::f32 && Cond.hasOneUse())
+ return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+
+ // There's no reason to not do this if the condition has other uses.
+ return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
+}
+
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -2471,23 +2711,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
simplifyI24(N1, DCI);
return SDValue();
}
- case ISD::SELECT: {
- SDValue Cond = N->getOperand(0);
- if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
- EVT VT = N->getValueType(0);
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- SDValue CC = Cond.getOperand(2);
-
- SDValue True = N->getOperand(1);
- SDValue False = N->getOperand(2);
-
- if (VT == MVT::f32)
- return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
- }
-
- break;
- }
+ case ISD::SELECT:
+ return performSelectCombine(N, DCI);
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
@@ -2699,6 +2924,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
+ NODE_NAME_CASE(FFBH_U32)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 7314cc0..3792541 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -54,6 +54,9 @@ private:
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -67,6 +70,9 @@ private:
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS,
+ DAGCombinerInfo &DCI) const;
+ SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
protected:
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
@@ -109,6 +115,8 @@ protected:
SmallVectorImpl<ISD::InputArg> &OrigIns) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
+ void AnalyzeReturn(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) const;
public:
AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
@@ -263,6 +271,7 @@ enum NodeType : unsigned {
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
BFM, // Insert a range of bits into a 32-bit word.
+ FFBH_U32, // ctlz with -1 if input is zero.
MUL_U24,
MUL_I24,
MAD_U24,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 70e589c..575dfe4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -191,6 +191,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
+def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
+
// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
// performing the mulitply. The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
@@ -240,4 +242,4 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
// Call/Return DAG Nodes
//===----------------------------------------------------------------------===//
def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 22f85b3..b1be619 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -66,8 +66,12 @@ static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
}
static MachineSchedRegistry
-SchedCustomRegistry("r600", "Run R600's custom scheduler",
- createR600MachineScheduler);
+R600SchedRegistry("r600", "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
+static MachineSchedRegistry
+SISchedRegistry("si", "Run SI's custom scheduler",
+ createSIMachineScheduler);
static std::string computeDataLayout(const Triple &TT) {
std::string Ret = "e-p:32:32";
diff --git a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 779a14e..2245f14 100644
--- a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -349,7 +349,7 @@ def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
-def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
+def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>;
def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
let hasSideEffects = 1 in {
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 68b1d1a..4bc80a0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -28,7 +28,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
//===--- Global Variable Emission Directives --------------------------===//
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
- HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
WeakRefDirective = ".weakref\t";
//===--- Dwarf Emission Directives -----------------------------------===//
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index 7f79dd3..aa1e352 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -137,7 +137,7 @@ namespace SIOutMods {
#define C_00B84C_EXCP_EN
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
-
+#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 96e37c5..f59d994 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -215,7 +215,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
unsigned SrcReg = MI.getOperand(I).getReg();
- unsigned SrcSubReg = MI.getOperand(I).getReg();
+ unsigned SrcSubReg = MI.getOperand(I).getSubReg();
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
assert(TRI->isSGPRClass(SrcRC) &&
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 02a3930..6230d1e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -334,12 +334,20 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
!MRI.hasOneUse(MI.getOperand(0).getReg()))
continue;
- // FIXME: Fold operands with subregs.
if (OpToFold.isReg() &&
- (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
- OpToFold.getSubReg()))
+ !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
continue;
+ // Prevent folding operands backwards in the function. For example,
+ // the COPY opcode must not be replaced by 1 in this example:
+ //
+ // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3
+ // ...
+ // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use>
+ MachineOperand &Dst = MI.getOperand(0);
+ if (Dst.isReg() &&
+ !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+ continue;
// We need mutate the operands of new mov instructions to add implicit
// uses of EXEC, but adding them invalidates the use_iterator, so defer
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0e043cb..5448675 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -259,7 +259,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::UMAX);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -598,18 +597,20 @@ SDValue SITargetLowering::LowerFormalArguments(
// First check if it's a PS input addr
if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
- !Arg.Flags.isByVal()) {
+ !Arg.Flags.isByVal() && PSInputNum <= 15) {
- assert((PSInputNum <= 15) && "Too many PS inputs!");
-
- if (!Arg.Used) {
+ if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
// We can safely skip PS inputs
Skipped.set(i);
++PSInputNum;
continue;
}
- Info->PSInputAddr |= 1 << PSInputNum++;
+ Info->markPSInputAllocated(PSInputNum);
+ if (Arg.Used)
+ Info->PSInputEna |= 1 << PSInputNum;
+
+ ++PSInputNum;
}
// Second split vertices into their elements
@@ -639,11 +640,25 @@ SDValue SITargetLowering::LowerFormalArguments(
*DAG.getContext());
// At least one interpolation mode must be enabled or else the GPU will hang.
+ //
+ // Check PSInputAddr instead of PSInputEna. The idea is that if the user set
+ // PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
if (Info->getShaderType() == ShaderType::PIXEL &&
- (Info->PSInputAddr & 0x7F) == 0) {
- Info->PSInputAddr |= 1;
+ ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11)))) {
CCInfo.AllocateReg(AMDGPU::VGPR0);
CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->PSInputEna |= 1;
}
if (Info->getShaderType() == ShaderType::COMPUTE) {
@@ -872,6 +887,97 @@ SDValue SITargetLowering::LowerFormalArguments(
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
+SDValue SITargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc DL, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (Info->getShaderType() == ShaderType::COMPUTE)
+ return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
+ OutVals, DL, DAG);
+
+ Info->setIfReturnsVoid(Outs.size() == 0);
+
+ SmallVector<ISD::OutputArg, 48> Splits;
+ SmallVector<SDValue, 48> SplitVals;
+
+ // Split vectors into their elements.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ const ISD::OutputArg &Out = Outs[i];
+
+ if (Out.VT.isVector()) {
+ MVT VT = Out.VT.getVectorElementType();
+ ISD::OutputArg NewOut = Out;
+ NewOut.Flags.setSplit();
+ NewOut.VT = VT;
+
+ // We want the original number of vector elements here, e.g.
+ // three or five, not four or eight.
+ unsigned NumElements = Out.ArgVT.getVectorNumElements();
+
+ for (unsigned j = 0; j != NumElements; ++j) {
+ SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
+ DAG.getConstant(j, DL, MVT::i32));
+ SplitVals.push_back(Elem);
+ Splits.push_back(NewOut);
+ NewOut.PartOffset += NewOut.VT.getStoreSize();
+ }
+ } else {
+ SplitVals.push_back(OutVals[i]);
+ Splits.push_back(Out);
+ }
+ }
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 48> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+
+ // Analyze outgoing return values.
+ AnalyzeReturn(CCInfo, Splits);
+
+ SDValue Flag;
+ SmallVector<SDValue, 48> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = SplitVals[realRVLocIdx];
+
+ // Copied from other backends.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ // Update chain and glue.
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps);
+}
+
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
@@ -1158,6 +1264,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
case Intrinsic::amdgcn_dispatch_ptr:
+ if (!Subtarget->isAmdHsaOS()) {
+ DiagnosticInfoUnsupported BadIntrin(*MF.getFunction(),
+ "hsa intrinsic without hsa target");
+ DAG.getContext()->diagnose(BadIntrin);
+ return DAG.getUNDEF(VT);
+ }
+
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
@@ -2027,7 +2140,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP: {
return performUCharToFloatCombine(N, DCI);
-
+ }
case ISD::FADD: {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
break;
@@ -2109,7 +2222,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
break;
}
- }
case ISD::LOAD:
case ISD::STORE:
case ISD::ATOMIC_LOAD:
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index e2f8cb1..f01b2c0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -95,6 +95,13 @@ public:
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc DL, SelectionDAG &DAG) const override;
+
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const override;
bool enableAggressiveFMAFusion(EVT VT) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 821aada..94e6147 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -84,6 +84,9 @@ private:
bool LastInstWritesM0;
+ /// \brief Whether the machine function returns void
+ bool ReturnsVoid;
+
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -322,7 +325,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
const Counters &Required) {
// End of program? No need to wait on anything
- if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ // A function not returning void needs to wait, because other bytecode will
+ // be appended after it and we don't know what it will be.
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
return false;
// Figure out if the async instructions execute in order
@@ -465,6 +470,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
+ ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@@ -488,6 +494,14 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// Wait for everything at the end of the MBB
Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+
+ // Functions returning something shouldn't contain S_ENDPGM, because other
+ // bytecode will be appended after it.
+ if (!ReturnsVoid) {
+ MachineBasicBlock::iterator I = MBB.getFirstTerminator();
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ I->eraseFromParent();
+ }
}
return Changes;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a08a5a8..1e10d25 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1777,6 +1777,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
MRI.getRegClass(Reg) :
RI.getPhysRegClass(Reg);
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+ RC = TRI->getSubRegClass(RC, MO.getSubReg());
+
// In order to be legal, the common sub-class must be equal to the
// class of the current operand. For example:
//
@@ -3075,3 +3079,15 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
return Rsrc23;
}
+
+bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+
+ return isSMRD(Opc);
+}
+
+bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+
+ return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 307ef67..cce1ae7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -462,6 +462,9 @@ public:
uint64_t getDefaultRsrcDataFormat() const;
uint64_t getScratchRsrcWords23() const;
+
+ bool isLowLatencyInstruction(const MachineInstr *MI) const;
+ bool isHighLatencyInstruction(const MachineInstr *MI) const;
};
namespace AMDGPU {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index b7df058..89692ab 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -144,7 +144,7 @@ defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
- [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+ [(set i32:$dst, (AMDGPUffbh_u32 i32:$src0))]
>;
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index bf15516..49677fc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -46,8 +46,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
- LDSWaveSpillSize(0),
PSInputAddr(0),
+ ReturnsVoid(true),
+ LDSWaveSpillSize(0),
+ PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
@@ -72,6 +74,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const Function *F = MF.getFunction();
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
if (getShaderType() == ShaderType::COMPUTE)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9c528d6..846ee5d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -57,10 +57,14 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
unsigned WorkGroupInfoSystemSGPR;
unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
+ // Graphics info.
+ unsigned PSInputAddr;
+ bool ReturnsVoid;
+
public:
// FIXME: Make private
unsigned LDSWaveSpillSize;
- unsigned PSInputAddr;
+ unsigned PSInputEna;
std::map<unsigned, unsigned> LaneVGPRs;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
@@ -273,6 +277,26 @@ public:
HasSpilledVGPRs = Spill;
}
+ unsigned getPSInputAddr() const {
+ return PSInputAddr;
+ }
+
+ bool isPSInputAllocated(unsigned Index) const {
+ return PSInputAddr & (1 << Index);
+ }
+
+ void markPSInputAllocated(unsigned Index) {
+ PSInputAddr |= 1 << Index;
+ }
+
+ bool returnsVoid() const {
+ return ReturnsVoid;
+ }
+
+ void setIfReturnsVoid(bool Value) {
+ ReturnsVoid = Value;
+ }
+
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
new file mode 100644
index 0000000..1cfa984
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -0,0 +1,1968 @@
+//===-- SIMachineScheduler.cpp - SI Scheduler Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIMachineScheduler.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+// This scheduler implements a different scheduling algorithm than
+// GenericScheduler.
+//
+// There are several specific architecture behaviours that can't be modelled
+// for GenericScheduler:
+// . When accessing the result of an SGPR load instruction, you have to wait
+// for all the SGPR load instructions before your current instruction to
+// have finished.
+// . When accessing the result of an VGPR load instruction, you have to wait
+// for all the VGPR load instructions previous to the VGPR load instruction
+// you are interested in to finish.
+// . The less the register pressure, the best load latencies are hidden
+//
+// Moreover some specifities (like the fact a lot of instructions in the shader
+// have few dependencies) makes the generic scheduler have some unpredictable
+// behaviours. For example when register pressure becomes high, it can either
+// manage to prevent register pressure from going too high, or it can
+// increase register pressure even more than if it hadn't taken register
+// pressure into account.
+//
+// Also some other bad behaviours are generated, like loading at the beginning
+// of the shader a constant in VGPR you won't need until the end of the shader.
+//
+// The scheduling problem for SI can distinguish three main parts:
+// . Hiding high latencies (texture sampling, etc)
+// . Hiding low latencies (SGPR constant loading, etc)
+// . Keeping register usage low for better latency hiding and general
+// performance
+//
+// Some other things can also affect performance, but are hard to predict
+// (cache usage, the fact the HW can issue several instructions from different
+// wavefronts if different types, etc)
+//
+// This scheduler tries to solve the scheduling problem by dividing it into
+// simpler sub-problems. It divides the instructions into blocks, schedules
+// locally inside the blocks where it takes care of low latencies, and then
+// chooses the order of the blocks by taking care of high latencies.
+// Dividing the instructions into blocks helps control keeping register
+// usage low.
+//
+// First the instructions are put into blocks.
+// We want the blocks help control register usage and hide high latencies
+// later. To help control register usage, we typically want all local
+// computations, when for example you create a result that can be comsummed
+// right away, to be contained in a block. Block inputs and outputs would
+// typically be important results that are needed in several locations of
+// the shader. Since we do want blocks to help hide high latencies, we want
+// the instructions inside the block to have a minimal set of dependencies
+// on high latencies. It will make it easy to pick blocks to hide specific
+// high latencies.
+// The block creation algorithm is divided into several steps, and several
+// variants can be tried during the scheduling process.
+//
+// Second the order of the instructions inside the blocks is choosen.
+// At that step we do take into account only register usage and hiding
+// low latency instructions
+//
+// Third the block order is choosen, there we try to hide high latencies
+// and keep register usage low.
+//
+// After the third step, a pass is done to improve the hiding of low
+// latencies.
+//
+// Actually when talking about 'low latency' or 'high latency' it includes
+// both the latency to get the cache (or global mem) data go to the register,
+// and the bandwith limitations.
+// Increasing the number of active wavefronts helps hide the former, but it
+// doesn't solve the latter, thus why even if wavefront count is high, we have
+// to try have as many instructions hiding high latencies as possible.
+// The OpenCL doc says for example latency of 400 cycles for a global mem access,
+// which is hidden by 10 instructions if the wavefront count is 10.
+
+// Some figures taken from AMD docs:
+// Both texture and constant L1 caches are 4-way associative with 64 bytes
+// lines.
+// Constant cache is shared with 4 CUs.
+// For texture sampling, the address generation unit receives 4 texture
+// addresses per cycle, thus we could expect texture sampling latency to be
+// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
+// instructions in a wavefront group are executed every 4 cycles),
+// or 16 instructions if the other wavefronts associated to the 3 other VALUs
+// of the CU do texture sampling too. (Don't take these figures too seriously,
+// as I'm not 100% sure of the computation)
+// Data exports should get similar latency.
+// For constant loading, the cache is shader with 4 CUs.
+// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
+// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
+// It means a simple s_buffer_load should take one instruction to hide, as
+// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
+// cache line.
+//
+// As of today the driver doesn't preload the constants in cache, thus the
+// first loads get extra latency. The doc says global memory access can be
+// 300-600 cycles. We do not specially take that into account when scheduling
+// As we expect the driver to be able to preload the constants soon.
+
+
+// common code //
+
+#ifndef NDEBUG
+
+static const char *getReasonStr(SIScheduleCandReason Reason) {
+ switch (Reason) {
+ case NoCand: return "NOCAND";
+ case RegUsage: return "REGUSAGE";
+ case Latency: return "LATENCY";
+ case Successor: return "SUCCESSOR";
+ case Depth: return "DEPTH";
+ case NodeOrder: return "ORDER";
+ }
+ llvm_unreachable("Unknown reason!");
+}
+
+#endif
+
+static bool tryLess(int TryVal, int CandVal,
+ SISchedulerCandidate &TryCand,
+ SISchedulerCandidate &Cand,
+ SIScheduleCandReason Reason) {
+ if (TryVal < CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal > CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ Cand.setRepeat(Reason);
+ return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+ SISchedulerCandidate &TryCand,
+ SISchedulerCandidate &Cand,
+ SIScheduleCandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ Cand.setRepeat(Reason);
+ return false;
+}
+
+// SIScheduleBlock //
+
+void SIScheduleBlock::addUnit(SUnit *SU) {
+ NodeNum2Index[SU->NodeNum] = SUnits.size();
+ SUnits.push_back(SU);
+}
+
+#ifndef NDEBUG
+
+void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
+
+ dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ dbgs() << '\n';
+}
+#endif
+
+void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
+ SISchedCandidate &TryCand) {
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+
+ if (Cand.SGPRUsage > 60 &&
+ tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage))
+ return;
+
+ // Schedule low latency instructions as top as possible.
+ // Order of priority is:
+ // . Low latency instructions which do not depend on other low latency
+ // instructions we haven't waited for
+ // . Other instructions which do not depend on low latency instructions
+ // we haven't waited for
+ // . Low latencies
+ // . All other instructions
+ // Goal is to get: low latency instructions - independant instructions
+ // - (eventually some more low latency instructions)
+ // - instructions that depend on the first low latency instructions.
+ // If in the block there is a lot of constant loads, the SGPR usage
+ // could go quite high, thus above the arbitrary limit of 60 will encourage
+ // use the already loaded constants (in order to release some SGPRs) before
+ // loading more.
+ if (tryLess(TryCand.HasLowLatencyNonWaitedParent,
+ Cand.HasLowLatencyNonWaitedParent,
+ TryCand, Cand, SIScheduleCandReason::Depth))
+ return;
+
+ if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
+ TryCand, Cand, SIScheduleCandReason::Depth))
+ return;
+
+ if (TryCand.IsLowLatency &&
+ tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
+ TryCand, Cand, SIScheduleCandReason::Depth))
+ return;
+
+ if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage))
+ return;
+
+ // Fall through to original instruction order.
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
+ TryCand.Reason = NodeOrder;
+ }
+}
+
+SUnit* SIScheduleBlock::pickNode() {
+ SISchedCandidate TopCand;
+
+ for (SUnit* SU : TopReadySUs) {
+ SISchedCandidate TryCand;
+ std::vector<unsigned> pressure;
+ std::vector<unsigned> MaxPressure;
+ // Predict register usage after this instruction.
+ TryCand.SU = SU;
+ TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
+ TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
+ TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
+ TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
+ TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
+ TryCand.HasLowLatencyNonWaitedParent =
+ HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
+ tryCandidateTopDown(TopCand, TryCand);
+ if (TryCand.Reason != NoCand)
+ TopCand.setBest(TryCand);
+ }
+
+ return TopCand.SU;
+}
+
+
+// Schedule something valid.
+void SIScheduleBlock::fastSchedule() {
+ TopReadySUs.clear();
+ if (Scheduled)
+ undoSchedule();
+
+ for (SUnit* SU : SUnits) {
+ if (!SU->NumPredsLeft)
+ TopReadySUs.push_back(SU);
+ }
+
+ while (!TopReadySUs.empty()) {
+ SUnit *SU = TopReadySUs[0];
+ ScheduledSUnits.push_back(SU);
+ nodeScheduled(SU);
+ }
+
+ Scheduled = true;
+}
+
+// Returns if the register was set between first and last.
+static bool isDefBetween(unsigned Reg,
+ SlotIndex First, SlotIndex Last,
+ const MachineRegisterInfo *MRI,
+ const LiveIntervals *LIS) {
+ for (MachineRegisterInfo::def_instr_iterator
+ UI = MRI->def_instr_begin(Reg),
+ UE = MRI->def_instr_end(); UI != UE; ++UI) {
+ const MachineInstr* MI = &*UI;
+ if (MI->isDebugValue())
+ continue;
+ SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+ if (InstSlot >= First && InstSlot <= Last)
+ return true;
+ }
+ return false;
+}
+
+void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock) {
+ IntervalPressure Pressure, BotPressure;
+ RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
+ LiveIntervals *LIS = DAG->getLIS();
+ MachineRegisterInfo *MRI = DAG->getMRI();
+ DAG->initRPTracker(TopRPTracker);
+ DAG->initRPTracker(BotRPTracker);
+ DAG->initRPTracker(RPTracker);
+
+ // Goes though all SU. RPTracker captures what had to be alive for the SUs
+ // to execute, and what is still alive at the end.
+ for (SUnit* SU : ScheduledSUnits) {
+ RPTracker.setPos(SU->getInstr());
+ RPTracker.advance();
+ }
+
+ // Close the RPTracker to finalize live ins/outs.
+ RPTracker.closeRegion();
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Do not Track Physical Registers, because it messes up.
+ for (unsigned Reg : RPTracker.getPressure().LiveInRegs) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ LiveInRegs.insert(Reg);
+ }
+ LiveOutRegs.clear();
+ // There is several possibilities to distinguish:
+ // 1) Reg is not input to any instruction in the block, but is output of one
+ // 2) 1) + read in the block and not needed after it
+ // 3) 1) + read in the block but needed in another block
+ // 4) Reg is input of an instruction but another block will read it too
+ // 5) Reg is input of an instruction and then rewritten in the block.
+ // result is not read in the block (implies used in another block)
+ // 6) Reg is input of an instruction and then rewritten in the block.
+ // result is read in the block and not needed in another block
+ // 7) Reg is input of an instruction and then rewritten in the block.
+ // result is read in the block but also needed in another block
+ // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
+ // We want LiveOutRegs to contain only Regs whose content will be read after
+ // in another block, and whose content was written in the current block,
+ // that is we want it to get 1, 3, 5, 7
+ // Since we made the MIs of a block to be packed all together before
+ // scheduling, then the LiveIntervals were correct, and the RPTracker was
+ // able to correctly handle 5 vs 6, 2 vs 3.
+ // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
+ // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
+ // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
+ // The use of findDefBetween removes the case 4.
+ for (unsigned Reg : RPTracker.getPressure().LiveOutRegs) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ isDefBetween(Reg, LIS->getInstructionIndex(BeginBlock).getRegSlot(),
+ LIS->getInstructionIndex(EndBlock).getRegSlot(),
+ MRI, LIS)) {
+ LiveOutRegs.insert(Reg);
+ }
+ }
+
+ // Pressure = sum_alive_registers register size
+ // Internally llvm will represent some registers as big 128 bits registers
+ // for example, but they actually correspond to 4 actual 32 bits registers.
+ // Thus Pressure is not equal to num_alive_registers * constant.
+ LiveInPressure = TopPressure.MaxSetPressure;
+ LiveOutPressure = BotPressure.MaxSetPressure;
+
+ // Prepares TopRPTracker for top down scheduling.
+ TopRPTracker.closeTop();
+}
+
+void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock) {
+ if (!Scheduled)
+ fastSchedule();
+
+ // PreScheduling phase to set LiveIn and LiveOut.
+ initRegPressure(BeginBlock, EndBlock);
+ undoSchedule();
+
+ // Schedule for real now.
+
+ TopReadySUs.clear();
+
+ for (SUnit* SU : SUnits) {
+ if (!SU->NumPredsLeft)
+ TopReadySUs.push_back(SU);
+ }
+
+ while (!TopReadySUs.empty()) {
+ SUnit *SU = pickNode();
+ ScheduledSUnits.push_back(SU);
+ TopRPTracker.setPos(SU->getInstr());
+ TopRPTracker.advance();
+ nodeScheduled(SU);
+ }
+
+ // TODO: compute InternalAdditionnalPressure.
+ InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
+
+ // Check everything is right.
+#ifndef NDEBUG
+ assert(SUnits.size() == ScheduledSUnits.size() &&
+ TopReadySUs.empty());
+ for (SUnit* SU : SUnits) {
+ assert(SU->isScheduled &&
+ SU->NumPredsLeft == 0);
+ }
+#endif
+
+ Scheduled = true;
+}
+
+void SIScheduleBlock::undoSchedule() {
+ for (SUnit* SU : SUnits) {
+ SU->isScheduled = false;
+ for (SDep& Succ : SU->Succs) {
+ if (BC->isSUInBlock(Succ.getSUnit(), ID))
+ undoReleaseSucc(SU, &Succ);
+ }
+ }
+ HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
+ ScheduledSUnits.clear();
+ Scheduled = false;
+}
+
+void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ ++SuccSU->WeakPredsLeft;
+ return;
+ }
+ ++SuccSU->NumPredsLeft;
+}
+
+void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(DAG);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+
+ --SuccSU->NumPredsLeft;
+}
+
+/// Release Successors of the SU that are in the block or not.
+void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
+ for (SDep& Succ : SU->Succs) {
+ SUnit *SuccSU = Succ.getSUnit();
+
+ if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
+ continue;
+
+ releaseSucc(SU, &Succ);
+ if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
+ TopReadySUs.push_back(SuccSU);
+ }
+}
+
+void SIScheduleBlock::nodeScheduled(SUnit *SU) {
+ // Is in TopReadySUs
+ assert (!SU->NumPredsLeft);
+ std::vector<SUnit*>::iterator I =
+ std::find(TopReadySUs.begin(), TopReadySUs.end(), SU);
+ if (I == TopReadySUs.end()) {
+ dbgs() << "Data Structure Bug in SI Scheduler\n";
+ llvm_unreachable(nullptr);
+ }
+ TopReadySUs.erase(I);
+
+ releaseSuccessors(SU, true);
+ // Scheduling this node will trigger a wait,
+ // thus propagate to other instructions that they do not need to wait either.
+ if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
+ HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
+
+ if (DAG->IsLowLatencySU[SU->NodeNum]) {
+ for (SDep& Succ : SU->Succs) {
+ std::map<unsigned, unsigned>::iterator I =
+ NodeNum2Index.find(Succ.getSUnit()->NodeNum);
+ if (I != NodeNum2Index.end())
+ HasLowLatencyNonWaitedParent[I->second] = 1;
+ }
+ }
+ SU->isScheduled = true;
+}
+
+void SIScheduleBlock::finalizeUnits() {
+ // We remove links from outside blocks to enable scheduling inside the block.
+ for (SUnit* SU : SUnits) {
+ releaseSuccessors(SU, false);
+ if (DAG->IsHighLatencySU[SU->NodeNum])
+ HighLatencyBlock = true;
+ }
+ HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
+}
+
+// we maintain ascending order of IDs
+void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
+ unsigned PredID = Pred->getID();
+
+ // Check if not already predecessor.
+ for (SIScheduleBlock* P : Preds) {
+ if (PredID == P->getID())
+ return;
+ }
+ Preds.push_back(Pred);
+
+#ifndef NDEBUG
+ for (SIScheduleBlock* S : Succs) {
+ if (PredID == S->getID())
+ assert(!"Loop in the Block Graph!\n");
+ }
+#endif
+}
+
+void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) {
+ unsigned SuccID = Succ->getID();
+
+ // Check if not already predecessor.
+ for (SIScheduleBlock* S : Succs) {
+ if (SuccID == S->getID())
+ return;
+ }
+ if (Succ->isHighLatencyBlock())
+ ++NumHighLatencySuccessors;
+ Succs.push_back(Succ);
+#ifndef NDEBUG
+ for (SIScheduleBlock* P : Preds) {
+ if (SuccID == P->getID())
+ assert("Loop in the Block Graph!\n");
+ }
+#endif
+}
+
+#ifndef NDEBUG
+void SIScheduleBlock::printDebug(bool full) {
+ dbgs() << "Block (" << ID << ")\n";
+ if (!full)
+ return;
+
+ dbgs() << "\nContains High Latency Instruction: "
+ << HighLatencyBlock << '\n';
+ dbgs() << "\nDepends On:\n";
+ for (SIScheduleBlock* P : Preds) {
+ P->printDebug(false);
+ }
+
+ dbgs() << "\nSuccessors:\n";
+ for (SIScheduleBlock* S : Succs) {
+ S->printDebug(false);
+ }
+
+ if (Scheduled) {
+ dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' '
+ << LiveInPressure[DAG->getVGPRSetID()] << '\n';
+ dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' '
+ << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n";
+ dbgs() << "LiveIns:\n";
+ for (unsigned Reg : LiveInRegs)
+ dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+
+ dbgs() << "\nLiveOuts:\n";
+ for (unsigned Reg : LiveOutRegs)
+ dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+ }
+
+ dbgs() << "\nInstructions:\n";
+ if (!Scheduled) {
+ for (SUnit* SU : SUnits) {
+ SU->dump(DAG);
+ }
+ } else {
+ for (SUnit* SU : SUnits) {
+ SU->dump(DAG);
+ }
+ }
+
+ dbgs() << "///////////////////////\n";
+}
+
+#endif
+
+// SIScheduleBlockCreator //
+
+SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
+DAG(DAG) {
+}
+
+SIScheduleBlockCreator::~SIScheduleBlockCreator() {
+}
+
+SIScheduleBlocks
+SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
+ std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
+ Blocks.find(BlockVariant);
+ if (B == Blocks.end()) {
+ SIScheduleBlocks Res;
+ createBlocksForVariant(BlockVariant);
+ topologicalSort();
+ scheduleInsideBlocks();
+ fillStats();
+ Res.Blocks = CurrentBlocks;
+ Res.TopDownIndex2Block = TopDownIndex2Block;
+ Res.TopDownBlock2Index = TopDownBlock2Index;
+ Blocks[BlockVariant] = Res;
+ return Res;
+ } else {
+ return B->second;
+ }
+}
+
+bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
+ if (SU->NodeNum >= DAG->SUnits.size())
+ return false;
+ return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
+}
+
+void SIScheduleBlockCreator::colorHighLatenciesAlone() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ if (DAG->IsHighLatencySU[SU->NodeNum]) {
+ CurrentColoring[SU->NodeNum] = NextReservedID++;
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorHighLatenciesGroups() {
+ unsigned DAGSize = DAG->SUnits.size();
+ unsigned NumHighLatencies = 0;
+ unsigned GroupSize;
+ unsigned Color = NextReservedID;
+ unsigned Count = 0;
+ std::set<unsigned> FormingGroup;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ if (DAG->IsHighLatencySU[SU->NodeNum])
+ ++NumHighLatencies;
+ }
+
+ if (NumHighLatencies == 0)
+ return;
+
+ if (NumHighLatencies <= 6)
+ GroupSize = 2;
+ else if (NumHighLatencies <= 12)
+ GroupSize = 3;
+ else
+ GroupSize = 4;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ if (DAG->IsHighLatencySU[SU->NodeNum]) {
+ unsigned CompatibleGroup = true;
+ unsigned ProposedColor = Color;
+ for (unsigned j : FormingGroup) {
+ // TODO: Currently CompatibleGroup will always be false,
+ // because the graph enforces the load order. This
+ // can be fixed, but as keeping the load order is often
+ // good for performance that causes a performance hit (both
+ // the default scheduler and this scheduler).
+ // When this scheduler determines a good load order,
+ // this can be fixed.
+ if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) ||
+ !DAG->canAddEdge(&DAG->SUnits[j], SU))
+ CompatibleGroup = false;
+ }
+ if (!CompatibleGroup || ++Count == GroupSize) {
+ FormingGroup.clear();
+ Color = ++NextReservedID;
+ if (!CompatibleGroup) {
+ ProposedColor = Color;
+ FormingGroup.insert(SU->NodeNum);
+ }
+ Count = 0;
+ } else {
+ FormingGroup.insert(SU->NodeNum);
+ }
+ CurrentColoring[SU->NodeNum] = ProposedColor;
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorComputeReservedDependencies() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<std::set<unsigned>, unsigned> ColorCombinations;
+
+ CurrentTopDownReservedDependencyColoring.clear();
+ CurrentBottomUpReservedDependencyColoring.clear();
+
+ CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
+ CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
+
+ // Traverse TopDown, and give different colors to SUs depending
+ // on which combination of High Latencies they depend on.
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->TopDownIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ // Already given.
+ if (CurrentColoring[SU->NodeNum]) {
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+ CurrentColoring[SU->NodeNum];
+ continue;
+ }
+
+ for (SDep& PredDep : SU->Preds) {
+ SUnit *Pred = PredDep.getSUnit();
+ if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
+ continue;
+ if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
+ SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
+ }
+ // Color 0 by default.
+ if (SUColors.empty())
+ continue;
+ // Same color than parents.
+ if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+ *SUColors.begin();
+ else {
+ std::map<std::set<unsigned>, unsigned>::iterator Pos =
+ ColorCombinations.find(SUColors);
+ if (Pos != ColorCombinations.end()) {
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
+ } else {
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+ NextNonReservedID;
+ ColorCombinations[SUColors] = NextNonReservedID++;
+ }
+ }
+ }
+
+ ColorCombinations.clear();
+
+ // Same as before, but BottomUp.
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ // Already given.
+ if (CurrentColoring[SU->NodeNum]) {
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+ CurrentColoring[SU->NodeNum];
+ continue;
+ }
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
+ SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
+ }
+ // Keep color 0.
+ if (SUColors.empty())
+ continue;
+ // Same color than parents.
+ if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+ *SUColors.begin();
+ else {
+ std::map<std::set<unsigned>, unsigned>::iterator Pos =
+ ColorCombinations.find(SUColors);
+ if (Pos != ColorCombinations.end()) {
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
+ } else {
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+ NextNonReservedID;
+ ColorCombinations[SUColors] = NextNonReservedID++;
+ }
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
+
+ // Every combination of colors given by the top down
+ // and bottom up Reserved node dependency
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ std::pair<unsigned, unsigned> SUColors;
+
+ // High latency instructions: already given.
+ if (CurrentColoring[SU->NodeNum])
+ continue;
+
+ SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
+ SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
+
+ std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
+ ColorCombinations.find(SUColors);
+ if (Pos != ColorCombinations.end()) {
+ CurrentColoring[SU->NodeNum] = Pos->second;
+ } else {
+ CurrentColoring[SU->NodeNum] = NextNonReservedID;
+ ColorCombinations[SUColors] = NextNonReservedID++;
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::vector<int> PendingColoring = CurrentColoring;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+ std::set<unsigned> SUColorsPending;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
+ CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1 && SUColorsPending.size() == 1)
+ PendingColoring[SU->NodeNum] = *SUColors.begin();
+ else // TODO: Attribute new colors depending on color
+ // combination of children.
+ PendingColoring[SU->NodeNum] = NextNonReservedID++;
+ }
+ CurrentColoring = PendingColoring;
+}
+
+
+void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+ unsigned PreviousColor;
+ std::set<unsigned> SeenColors;
+
+ if (DAGSize <= 1)
+ return;
+
+ PreviousColor = CurrentColoring[0];
+
+ for (unsigned i = 1, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ unsigned CurrentColor = CurrentColoring[i];
+ unsigned PreviousColorSave = PreviousColor;
+ assert(i == SU->NodeNum);
+
+ if (CurrentColor != PreviousColor)
+ SeenColors.insert(PreviousColor);
+ PreviousColor = CurrentColor;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ if (SeenColors.find(CurrentColor) == SeenColors.end())
+ continue;
+
+ if (PreviousColorSave != CurrentColor)
+ CurrentColoring[i] = NextNonReservedID++;
+ else
+ CurrentColoring[i] = CurrentColoring[i-1];
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ // No predecessor: Vgpr constant loading.
+ // Low latency instructions usually have a predecessor (the address)
+ if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1)
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1)
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<unsigned, unsigned> ColorCount;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ unsigned color = CurrentColoring[SU->NodeNum];
+ std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color);
+ if (Pos != ColorCount.end()) {
+ ++ColorCount[color];
+ } else {
+ ColorCount[color] = 1;
+ }
+ }
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ unsigned color = CurrentColoring[SU->NodeNum];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ if (ColorCount[color] > 1)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1 && *SUColors.begin() != color) {
+ --ColorCount[color];
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ ++ColorCount[*SUColors.begin()];
+ }
+ }
+}
+
+void SIScheduleBlockCreator::cutHugeBlocks() {
+ // TODO
+}
+
+void SIScheduleBlockCreator::regroupNoUserInstructions() {
+ unsigned DAGSize = DAG->SUnits.size();
+ int GroupID = NextNonReservedID++;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ bool hasSuccessor = false;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ hasSuccessor = true;
+ }
+ if (!hasSuccessor)
+ CurrentColoring[SU->NodeNum] = GroupID;
+ }
+}
+
+void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<unsigned,unsigned> RealID;
+
+ CurrentBlocks.clear();
+ CurrentColoring.clear();
+ CurrentColoring.resize(DAGSize, 0);
+ Node2CurrentBlock.clear();
+
+ // Restore links previous scheduling variant has overridden.
+ DAG->restoreSULinksLeft();
+
+ NextReservedID = 1;
+ NextNonReservedID = DAGSize + 1;
+
+ DEBUG(dbgs() << "Coloring the graph\n");
+
+ if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
+ colorHighLatenciesGroups();
+ else
+ colorHighLatenciesAlone();
+ colorComputeReservedDependencies();
+ colorAccordingToReservedDependencies();
+ colorEndsAccordingToDependencies();
+ if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
+ colorForceConsecutiveOrderInGroup();
+ regroupNoUserInstructions();
+ colorMergeConstantLoadsNextGroup();
+ colorMergeIfPossibleNextGroupOnlyForReserved();
+
+ // Put SUs of same color into same block
+ Node2CurrentBlock.resize(DAGSize, -1);
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ unsigned Color = CurrentColoring[SU->NodeNum];
+ if (RealID.find(Color) == RealID.end()) {
+ int ID = CurrentBlocks.size();
+ BlockPtrs.push_back(
+ make_unique<SIScheduleBlock>(DAG, this, ID));
+ CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
+ RealID[Color] = ID;
+ }
+ CurrentBlocks[RealID[Color]]->addUnit(SU);
+ Node2CurrentBlock[SU->NodeNum] = RealID[Color];
+ }
+
+ // Build dependencies between blocks.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ int SUID = Node2CurrentBlock[i];
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ if (Node2CurrentBlock[Succ->NodeNum] != SUID)
+ CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]);
+ }
+ for (SDep& PredDep : SU->Preds) {
+ SUnit *Pred = PredDep.getSUnit();
+ if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
+ continue;
+ if (Node2CurrentBlock[Pred->NodeNum] != SUID)
+ CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
+ }
+ }
+
+ // Free root and leafs of all blocks to enable scheduling inside them.
+ for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->finalizeUnits();
+ }
+ DEBUG(
+ dbgs() << "Blocks created:\n\n";
+ for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->printDebug(true);
+ }
+ );
+}
+
+// Two functions taken from Codegen/MachineScheduler.cpp
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::const_iterator
+nextIfDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// Non-const version.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator End) {
+ // Cast the return value to nonconst MachineInstr, then cast to an
+ // instr_iterator, which does not check for null, finally return a
+ // bundle_iterator.
+ return MachineBasicBlock::instr_iterator(
+ const_cast<MachineInstr*>(
+ &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+}
+
+void SIScheduleBlockCreator::topologicalSort() {
+ unsigned DAGSize = CurrentBlocks.size();
+ std::vector<int> WorkList;
+
+ DEBUG(dbgs() << "Topological Sort\n");
+
+ WorkList.reserve(DAGSize);
+ TopDownIndex2Block.resize(DAGSize);
+ TopDownBlock2Index.resize(DAGSize);
+ BottomUpIndex2Block.resize(DAGSize);
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ unsigned Degree = Block->getSuccs().size();
+ TopDownBlock2Index[i] = Degree;
+ if (Degree == 0) {
+ WorkList.push_back(i);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ int i = WorkList.back();
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ WorkList.pop_back();
+ TopDownBlock2Index[i] = --Id;
+ TopDownIndex2Block[Id] = i;
+ for (SIScheduleBlock* Pred : Block->getPreds()) {
+ if (!--TopDownBlock2Index[Pred->getID()])
+ WorkList.push_back(Pred->getID());
+ }
+ }
+
+#ifndef NDEBUG
+ // Check correctness of the ordering.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ for (SIScheduleBlock* Pred : Block->getPreds()) {
+ assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
+ "Wrong Top Down topological sorting");
+ }
+ }
+#endif
+
+ BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
+ TopDownIndex2Block.rend());
+}
+
+void SIScheduleBlockCreator::scheduleInsideBlocks() {
+ unsigned DAGSize = CurrentBlocks.size();
+
+ DEBUG(dbgs() << "\nScheduling Blocks\n\n");
+
+ // We do schedule a valid scheduling such that a Block corresponds
+ // to a range of instructions.
+ DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->fastSchedule();
+ }
+
+ // Note: the following code, and the part restoring previous position
+ // is by far the most expensive operation of the Scheduler.
+
+ // Do not update CurrentTop.
+ MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
+ std::vector<MachineBasicBlock::iterator> PosOld;
+ std::vector<MachineBasicBlock::iterator> PosNew;
+ PosOld.reserve(DAG->SUnits.size());
+ PosNew.reserve(DAG->SUnits.size());
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ int BlockIndice = TopDownIndex2Block[i];
+ SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+ std::vector<SUnit*> SUs = Block->getScheduledUnits();
+
+ for (SUnit* SU : SUs) {
+ MachineInstr *MI = SU->getInstr();
+ MachineBasicBlock::iterator Pos = MI;
+ PosOld.push_back(Pos);
+ if (&*CurrentTopFastSched == MI) {
+ PosNew.push_back(Pos);
+ CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
+ DAG->getCurrentBottom());
+ } else {
+ // Update the instruction stream.
+ DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
+
+ // Update LiveIntervals.
+ // Note: Moving all instructions and calling handleMove everytime
+ // is the most cpu intensive operation of the scheduler.
+ // It would gain a lot if there was a way to recompute the
+ // LiveIntervals for the entire scheduling region.
+ DAG->getLIS()->handleMove(MI, /*UpdateFlags=*/true);
+ PosNew.push_back(CurrentTopFastSched);
+ }
+ }
+ }
+
+ // Now we have Block of SUs == Block of MI.
+ // We do the final schedule for the instructions inside the block.
+ // The property that all the SUs of the Block are grouped together as MI
+ // is used for correct reg usage tracking.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ std::vector<SUnit*> SUs = Block->getScheduledUnits();
+ Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
+ }
+
+ DEBUG(dbgs() << "Restoring MI Pos\n");
+ // Restore old ordering (which prevents a LIS->handleMove bug).
+ for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
+ MachineBasicBlock::iterator POld = PosOld[i-1];
+ MachineBasicBlock::iterator PNew = PosNew[i-1];
+ if (PNew != POld) {
+ // Update the instruction stream.
+ DAG->getBB()->splice(POld, DAG->getBB(), PNew);
+
+ // Update LiveIntervals.
+ DAG->getLIS()->handleMove(POld, /*UpdateFlags=*/true);
+ }
+ }
+
+ DEBUG(
+ for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->printDebug(true);
+ }
+ );
+}
+
+void SIScheduleBlockCreator::fillStats() {
+ unsigned DAGSize = CurrentBlocks.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ int BlockIndice = TopDownIndex2Block[i];
+ SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+ if (Block->getPreds().size() == 0)
+ Block->Depth = 0;
+ else {
+ unsigned Depth = 0;
+ for (SIScheduleBlock *Pred : Block->getPreds()) {
+ if (Depth < Pred->Depth + 1)
+ Depth = Pred->Depth + 1;
+ }
+ Block->Depth = Depth;
+ }
+ }
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ int BlockIndice = BottomUpIndex2Block[i];
+ SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+ if (Block->getSuccs().size() == 0)
+ Block->Height = 0;
+ else {
+ unsigned Height = 0;
+ for (SIScheduleBlock *Succ : Block->getSuccs()) {
+ if (Height < Succ->Height + 1)
+ Height = Succ->Height + 1;
+ }
+ Block->Height = Height;
+ }
+ }
+}
+
+// SIScheduleBlockScheduler //
+
+SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
+ SISchedulerBlockSchedulerVariant Variant,
+ SIScheduleBlocks BlocksStruct) :
+ DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
+ LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
+ SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
+
+ // Fill the usage of every output
+ // Warning: while by construction we always have a link between two blocks
+ // when one needs a result from the other, the number of users of an output
+ // is not the sum of child blocks having as input the same virtual register.
+ // Here is an example. A produces x and y. B eats x and produces x'.
+ // C eats x' and y. The register coalescer may have attributed the same
+ // virtual register to x and x'.
+ // To count accurately, we do a topological sort. In case the register is
+ // found for several parents, we increment the usage of the one with the
+ // highest topological index.
+ LiveOutRegsNumUsages.resize(Blocks.size());
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ for (unsigned Reg : Block->getInRegs()) {
+ bool Found = false;
+ int topoInd = -1;
+ for (SIScheduleBlock* Pred: Block->getPreds()) {
+ std::set<unsigned> PredOutRegs = Pred->getOutRegs();
+ std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
+
+ if (RegPos != PredOutRegs.end()) {
+ Found = true;
+ if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
+ topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
+ }
+ }
+ }
+
+ if (!Found)
+ continue;
+
+ int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
+ std::map<unsigned, unsigned>::iterator RegPos =
+ LiveOutRegsNumUsages[PredID].find(Reg);
+ if (RegPos != LiveOutRegsNumUsages[PredID].end()) {
+ ++LiveOutRegsNumUsages[PredID][Reg];
+ } else {
+ LiveOutRegsNumUsages[PredID][Reg] = 1;
+ }
+ }
+ }
+
+ LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
+ BlockNumPredsLeft.resize(Blocks.size());
+ BlockNumSuccsLeft.resize(Blocks.size());
+
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ BlockNumPredsLeft[i] = Block->getPreds().size();
+ BlockNumSuccsLeft[i] = Block->getSuccs().size();
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ assert(Block->getID() == i);
+ }
+#endif
+
+ std::set<unsigned> InRegs = DAG->getInRegs();
+ addLiveRegs(InRegs);
+
+ // Fill LiveRegsConsumers for regs that were already
+ // defined before scheduling.
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ for (unsigned Reg : Block->getInRegs()) {
+ bool Found = false;
+ for (SIScheduleBlock* Pred: Block->getPreds()) {
+ std::set<unsigned> PredOutRegs = Pred->getOutRegs();
+ std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
+
+ if (RegPos != PredOutRegs.end()) {
+ Found = true;
+ break;
+ }
+ }
+
+ if (!Found) {
+ if (LiveRegsConsumers.find(Reg) == LiveRegsConsumers.end())
+ LiveRegsConsumers[Reg] = 1;
+ else
+ ++LiveRegsConsumers[Reg];
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ if (BlockNumPredsLeft[i] == 0) {
+ ReadyBlocks.push_back(Block);
+ }
+ }
+
+ while (SIScheduleBlock *Block = pickBlock()) {
+ BlocksScheduled.push_back(Block);
+ blockScheduled(Block);
+ }
+
+ DEBUG(
+ dbgs() << "Block Order:";
+ for (SIScheduleBlock* Block : BlocksScheduled) {
+ dbgs() << ' ' << Block->getID();
+ }
+ );
+}
+
+bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand) {
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ // Try to hide high latencies.
+ if (tryLess(TryCand.LastPosHighLatParentScheduled,
+ Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
+ return true;
+ // Schedule high latencies early so you can hide them better.
+ if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
+ TryCand, Cand, Latency))
+ return true;
+ if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height,
+ TryCand, Cand, Depth))
+ return true;
+ if (tryGreater(TryCand.NumHighLatencySuccessors,
+ Cand.NumHighLatencySuccessors,
+ TryCand, Cand, Successor))
+ return true;
+ return false;
+}
+
+bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand) {
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
+ TryCand, Cand, RegUsage))
+ return true;
+ if (tryGreater(TryCand.NumSuccessors > 0,
+ Cand.NumSuccessors > 0,
+ TryCand, Cand, Successor))
+ return true;
+ if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
+ return true;
+ if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
+ TryCand, Cand, RegUsage))
+ return true;
+ return false;
+}
+
+SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
+ SIBlockSchedCandidate Cand;
+ std::vector<SIScheduleBlock*>::iterator Best;
+ SIScheduleBlock *Block;
+ if (ReadyBlocks.empty())
+ return nullptr;
+
+ DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
+ VregCurrentUsage, SregCurrentUsage);
+ if (VregCurrentUsage > maxVregUsage)
+ maxVregUsage = VregCurrentUsage;
+ if (VregCurrentUsage > maxSregUsage)
+ maxSregUsage = VregCurrentUsage;
+ DEBUG(
+ dbgs() << "Picking New Blocks\n";
+ dbgs() << "Available: ";
+ for (SIScheduleBlock* Block : ReadyBlocks)
+ dbgs() << Block->getID() << ' ';
+ dbgs() << "\nCurrent Live:\n";
+ for (unsigned Reg : LiveRegs)
+ dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+ dbgs() << '\n';
+ dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
+ dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';
+ );
+
+ Cand.Block = nullptr;
+ for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
+ E = ReadyBlocks.end(); I != E; ++I) {
+ SIBlockSchedCandidate TryCand;
+ TryCand.Block = *I;
+ TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
+ TryCand.VGPRUsageDiff =
+ checkRegUsageImpact(TryCand.Block->getInRegs(),
+ TryCand.Block->getOutRegs())[DAG->getVGPRSetID()];
+ TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
+ TryCand.NumHighLatencySuccessors =
+ TryCand.Block->getNumHighLatencySuccessors();
+ TryCand.LastPosHighLatParentScheduled =
+ (unsigned int) std::max<int> (0,
+ LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
+ LastPosWaitedHighLatency);
+ TryCand.Height = TryCand.Block->Height;
+ // Try not to increase VGPR usage too much, else we may spill.
+ if (VregCurrentUsage > 120 ||
+ Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
+ if (!tryCandidateRegUsage(Cand, TryCand) &&
+ Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
+ tryCandidateLatency(Cand, TryCand);
+ } else {
+ if (!tryCandidateLatency(Cand, TryCand))
+ tryCandidateRegUsage(Cand, TryCand);
+ }
+ if (TryCand.Reason != NoCand) {
+ Cand.setBest(TryCand);
+ Best = I;
+ DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
+ << getReasonStr(Cand.Reason) << '\n');
+ }
+ }
+
+ DEBUG(
+ dbgs() << "Picking: " << Cand.Block->getID() << '\n';
+ dbgs() << "Is a block with high latency instruction: "
+ << (Cand.IsHighLatency ? "yes\n" : "no\n");
+ dbgs() << "Position of last high latency dependency: "
+ << Cand.LastPosHighLatParentScheduled << '\n';
+ dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
+ dbgs() << '\n';
+ );
+
+ Block = Cand.Block;
+ ReadyBlocks.erase(Best);
+ return Block;
+}
+
+// Tracking of currently alive registers to determine VGPR Usage.
+
+void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
+ for (unsigned Reg : Regs) {
+ // For now only track virtual registers.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // If not already in the live set, then add it.
+ (void) LiveRegs.insert(Reg);
+ }
+}
+
+void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
+ std::set<unsigned> &Regs) {
+ for (unsigned Reg : Regs) {
+ // For now only track virtual registers.
+ std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
+ assert (Pos != LiveRegs.end() && // Reg must be live.
+ LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
+ LiveRegsConsumers[Reg] >= 1);
+ --LiveRegsConsumers[Reg];
+ if (LiveRegsConsumers[Reg] == 0)
+ LiveRegs.erase(Pos);
+ }
+}
+
+void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
+ for (SIScheduleBlock* Block : Parent->getSuccs()) {
+ --BlockNumPredsLeft[Block->getID()];
+ if (BlockNumPredsLeft[Block->getID()] == 0) {
+ ReadyBlocks.push_back(Block);
+ }
+ // TODO: Improve check. When the dependency between the high latency
+ // instructions and the instructions of the other blocks are WAR or WAW
+ // there will be no wait triggered. We would like these cases to not
+ // update LastPosHighLatencyParentScheduled.
+ if (Parent->isHighLatencyBlock())
+ LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled;
+ }
+}
+
+void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
+ decreaseLiveRegs(Block, Block->getInRegs());
+ addLiveRegs(Block->getOutRegs());
+ releaseBlockSuccs(Block);
+ for (std::map<unsigned, unsigned>::iterator RegI =
+ LiveOutRegsNumUsages[Block->getID()].begin(),
+ E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
+ std::pair<unsigned, unsigned> RegP = *RegI;
+ if (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end())
+ LiveRegsConsumers[RegP.first] = RegP.second;
+ else {
+ assert(LiveRegsConsumers[RegP.first] == 0);
+ LiveRegsConsumers[RegP.first] += RegP.second;
+ }
+ }
+ if (LastPosHighLatencyParentScheduled[Block->getID()] >
+ (unsigned)LastPosWaitedHighLatency)
+ LastPosWaitedHighLatency =
+ LastPosHighLatencyParentScheduled[Block->getID()];
+ ++NumBlockScheduled;
+}
+
+std::vector<int>
+SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
+ std::set<unsigned> &OutRegs) {
+ std::vector<int> DiffSetPressure;
+ DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
+
+ for (unsigned Reg : InRegs) {
+ // For now only track virtual registers.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (LiveRegsConsumers[Reg] > 1)
+ continue;
+ PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
+ for (; PSetI.isValid(); ++PSetI) {
+ DiffSetPressure[*PSetI] -= PSetI.getWeight();
+ }
+ }
+
+ for (unsigned Reg : OutRegs) {
+ // For now only track virtual registers.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
+ for (; PSetI.isValid(); ++PSetI) {
+ DiffSetPressure[*PSetI] += PSetI.getWeight();
+ }
+ }
+
+ return DiffSetPressure;
+}
+
+// SIScheduler //
+
+struct SIScheduleBlockResult
+SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
+ SISchedulerBlockSchedulerVariant ScheduleVariant) {
+ SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
+ SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
+ std::vector<SIScheduleBlock*> ScheduledBlocks;
+ struct SIScheduleBlockResult Res;
+
+ ScheduledBlocks = Scheduler.getBlocks();
+
+ for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
+ SIScheduleBlock *Block = ScheduledBlocks[b];
+ std::vector<SUnit*> SUs = Block->getScheduledUnits();
+
+ for (SUnit* SU : SUs)
+ Res.SUs.push_back(SU->NodeNum);
+ }
+
+ Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
+ Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
+ return Res;
+}
+
+// SIScheduleDAGMI //
+
+SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
+ ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)) {
+ SITII = static_cast<const SIInstrInfo*>(TII);
+ SITRI = static_cast<const SIRegisterInfo*>(TRI);
+
+ VGPRSetID = SITRI->getVGPR32PressureSet();
+ SGPRSetID = SITRI->getSGPR32PressureSet();
+}
+
+SIScheduleDAGMI::~SIScheduleDAGMI() {
+}
+
+ScheduleDAGInstrs *llvm::createSIMachineScheduler(MachineSchedContext *C) {
+ return new SIScheduleDAGMI(C);
+}
+
+// Code adapted from scheduleDAG.cpp
+// Does a topological sort over the SUs.
+// Both TopDown and BottomUp
+void SIScheduleDAGMI::topologicalSort() {
+ std::vector<int> TopDownSU2Index;
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+
+ DEBUG(dbgs() << "Topological Sort\n");
+ WorkList.reserve(DAGSize);
+
+ TopDownIndex2SU.resize(DAGSize);
+ TopDownSU2Index.resize(DAGSize);
+ BottomUpIndex2SU.resize(DAGSize);
+
+ WorkList.push_back(&getExitSU());
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ TopDownSU2Index[NodeNum] = Degree;
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (SU->NodeNum < DAGSize) {
+ TopDownSU2Index[SU->NodeNum] = --Id;
+ TopDownIndex2SU[Id] = SU->NodeNum;
+ }
+ for (SDep& Pred : SU->Preds) {
+ SUnit *SU = Pred.getSUnit();
+ if (SU->NodeNum < DAGSize && !--TopDownSU2Index[SU->NodeNum])
+ WorkList.push_back(SU);
+ }
+ }
+
+ BottomUpIndex2SU = std::vector<int>(TopDownIndex2SU.rbegin(),
+ TopDownIndex2SU.rend());
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SDep& Pred : SU->Preds) {
+ if (Pred.getSUnit()->NodeNum >= DAGSize)
+ continue;
+ assert(TopDownSU2Index[SU->NodeNum] >
+ TopDownSU2Index[Pred.getSUnit()->NodeNum] &&
+ "Wrong Top Down topological sorting");
+ }
+ }
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SDep& Succ : SU->Succs) {
+ if (Succ.getSUnit()->NodeNum >= DAGSize)
+ continue;
+ assert(TopDownSU2Index[SU->NodeNum] <
+ TopDownSU2Index[Succ.getSUnit()->NodeNum] &&
+ "Wrong Bottom Up topological sorting");
+ }
+ }
+#endif
+}
+
+// Move low latencies further from their user without
+// increasing SGPR usage (in general)
+// This is to be replaced by a better pass that would
+// take into account SGPR usage (based on VGPR Usage
+// and the corresponding wavefront count), that would
+// try to merge groups of loads if it make sense, etc
+void SIScheduleDAGMI::moveLowLatencies() {
+ unsigned DAGSize = SUnits.size();
+ int LastLowLatencyUser = -1;
+ int LastLowLatencyPos = -1;
+
+ for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[ScheduledSUnits[i]];
+ bool IsLowLatencyUser = false;
+ unsigned MinPos = 0;
+
+ for (SDep& PredDep : SU->Preds) {
+ SUnit *Pred = PredDep.getSUnit();
+ if (SITII->isLowLatencyInstruction(Pred->getInstr())) {
+ IsLowLatencyUser = true;
+ }
+ if (Pred->NodeNum >= DAGSize)
+ continue;
+ unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
+ if (PredPos >= MinPos)
+ MinPos = PredPos + 1;
+ }
+
+ if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+ unsigned BestPos = LastLowLatencyUser + 1;
+ if ((int)BestPos <= LastLowLatencyPos)
+ BestPos = LastLowLatencyPos + 1;
+ if (BestPos < MinPos)
+ BestPos = MinPos;
+ if (BestPos < i) {
+ for (unsigned u = i; u > BestPos; --u) {
+ ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
+ ScheduledSUnits[u] = ScheduledSUnits[u-1];
+ }
+ ScheduledSUnits[BestPos] = SU->NodeNum;
+ ScheduledSUnitsInv[SU->NodeNum] = BestPos;
+ }
+ LastLowLatencyPos = BestPos;
+ if (IsLowLatencyUser)
+ LastLowLatencyUser = BestPos;
+ } else if (IsLowLatencyUser) {
+ LastLowLatencyUser = i;
+ // Moves COPY instructions on which depends
+ // the low latency instructions too.
+ } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
+ bool CopyForLowLat = false;
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SITII->isLowLatencyInstruction(Succ->getInstr())) {
+ CopyForLowLat = true;
+ }
+ }
+ if (!CopyForLowLat)
+ continue;
+ if (MinPos < i) {
+ for (unsigned u = i; u > MinPos; --u) {
+ ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
+ ScheduledSUnits[u] = ScheduledSUnits[u-1];
+ }
+ ScheduledSUnits[MinPos] = SU->NodeNum;
+ ScheduledSUnitsInv[SU->NodeNum] = MinPos;
+ }
+ }
+ }
+}
+
+void SIScheduleDAGMI::restoreSULinksLeft() {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnits[i].isScheduled = false;
+ SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
+ SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
+ SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
+ SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
+ }
+}
+
+// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
+template<typename _Iterator> void
+SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
+ unsigned &VgprUsage, unsigned &SgprUsage) {
+ VgprUsage = 0;
+ SgprUsage = 0;
+ for (_Iterator RegI = First; RegI != End; ++RegI) {
+ unsigned Reg = *RegI;
+ // For now only track virtual registers
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
+ for (; PSetI.isValid(); ++PSetI) {
+ if (*PSetI == VGPRSetID)
+ VgprUsage += PSetI.getWeight();
+ else if (*PSetI == SGPRSetID)
+ SgprUsage += PSetI.getWeight();
+ }
+ }
+}
+
+void SIScheduleDAGMI::schedule()
+{
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ SIScheduleBlockResult Best, Temp;
+ DEBUG(dbgs() << "Preparing Scheduling\n");
+
+ buildDAGWithRegPressure();
+ DEBUG(
+ for(SUnit& SU : SUnits)
+ SU.dumpAll(this)
+ );
+
+ Topo.InitDAGTopologicalSorting();
+ topologicalSort();
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+ // We reuse several ScheduleDAGMI and ScheduleDAGMILive
+ // functions, but to make them happy we must initialize
+ // the default Scheduler implementation (even if we do not
+ // run it)
+ SchedImpl->initialize(this);
+ initQueues(TopRoots, BotRoots);
+
+ // Fill some stats to help scheduling.
+
+ SUnitsLinksBackup = SUnits;
+ IsLowLatencySU.clear();
+ LowLatencyOffset.clear();
+ IsHighLatencySU.clear();
+
+ IsLowLatencySU.resize(SUnits.size(), 0);
+ LowLatencyOffset.resize(SUnits.size(), 0);
+ IsHighLatencySU.resize(SUnits.size(), 0);
+
+ for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ unsigned BaseLatReg, OffLatReg;
+ if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+ IsLowLatencySU[i] = 1;
+ if (SITII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseLatReg,
+ OffLatReg, TRI))
+ LowLatencyOffset[i] = OffLatReg;
+ } else if (SITII->isHighLatencyInstruction(SU->getInstr()))
+ IsHighLatencySU[i] = 1;
+ }
+
+ SIScheduler Scheduler(this);
+ Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
+ SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
+#if 0 // To enable when handleMove fix lands
+ // if VGPR usage is extremely high, try other good performing variants
+ // which could lead to lower VGPR usage
+ if (Best.MaxVGPRUsage > 180) {
+ std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+ { LatenciesAlone, BlockRegUsageLatency },
+// { LatenciesAlone, BlockRegUsage },
+ { LatenciesGrouped, BlockLatencyRegUsage },
+// { LatenciesGrouped, BlockRegUsageLatency },
+// { LatenciesGrouped, BlockRegUsage },
+ { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
+// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
+// { LatenciesAlonePlusConsecutive, BlockRegUsage }
+ };
+ for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
+ Temp = Scheduler.scheduleVariant(v.first, v.second);
+ if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
+ Best = Temp;
+ }
+ }
+ // if VGPR usage is still extremely high, we may spill. Try other variants
+ // which are less performing, but that could lead to lower VGPR usage.
+ if (Best.MaxVGPRUsage > 200) {
+ std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+// { LatenciesAlone, BlockRegUsageLatency },
+ { LatenciesAlone, BlockRegUsage },
+// { LatenciesGrouped, BlockLatencyRegUsage },
+ { LatenciesGrouped, BlockRegUsageLatency },
+ { LatenciesGrouped, BlockRegUsage },
+// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
+ { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
+ { LatenciesAlonePlusConsecutive, BlockRegUsage }
+ };
+ for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
+ Temp = Scheduler.scheduleVariant(v.first, v.second);
+ if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
+ Best = Temp;
+ }
+ }
+#endif
+ ScheduledSUnits = Best.SUs;
+ ScheduledSUnitsInv.resize(SUnits.size());
+
+ for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
+ ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
+ }
+
+ moveLowLatencies();
+
+ // Tell the outside world about the result of the scheduling.
+
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+
+ for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
+ E = ScheduledSUnits.end(); I != E; ++I) {
+ SUnit *SU = &SUnits[*I];
+
+ scheduleMI(SU, true);
+
+ DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
+ }
+
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = begin()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
new file mode 100644
index 0000000..b270136
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -0,0 +1,489 @@
+//===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
+
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+enum SIScheduleCandReason {
+ NoCand,
+ RegUsage,
+ Latency,
+ Successor,
+ Depth,
+ NodeOrder
+};
+
+struct SISchedulerCandidate {
+ // The reason for this candidate.
+ SIScheduleCandReason Reason;
+
+ // Set of reasons that apply to multiple candidates.
+ uint32_t RepeatReasonSet;
+
+ SISchedulerCandidate()
+ : Reason(NoCand), RepeatReasonSet(0) {}
+
+ bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
+ void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
+};
+
+class SIScheduleDAGMI;
+class SIScheduleBlockCreator;
+
+class SIScheduleBlock {
+ SIScheduleDAGMI *DAG;
+ SIScheduleBlockCreator *BC;
+
+ std::vector<SUnit*> SUnits;
+ std::map<unsigned, unsigned> NodeNum2Index;
+ std::vector<SUnit*> TopReadySUs;
+ std::vector<SUnit*> ScheduledSUnits;
+
+ /// The top of the unscheduled zone.
+ IntervalPressure TopPressure;
+ RegPressureTracker TopRPTracker;
+
+ // Pressure: number of said class of registers needed to
+ // store the live virtual and real registers.
+ // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
+ // Pressure of additional registers required inside the block.
+ std::vector<unsigned> InternalAdditionnalPressure;
+ // Pressure of input and output registers
+ std::vector<unsigned> LiveInPressure;
+ std::vector<unsigned> LiveOutPressure;
+ // Registers required by the block, and outputs.
+ // We do track only virtual registers.
+ // Note that some registers are not 32 bits,
+ // and thus the pressure is not equal
+ // to the number of live registers.
+ std::set<unsigned> LiveInRegs;
+ std::set<unsigned> LiveOutRegs;
+
+ bool Scheduled;
+ bool HighLatencyBlock;
+
+ std::vector<unsigned> HasLowLatencyNonWaitedParent;
+
+ // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
+ unsigned ID;
+
+ std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
+ std::vector<SIScheduleBlock*> Succs; // All blocks successors.
+ unsigned NumHighLatencySuccessors;
+
+public:
+ SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
+ unsigned ID):
+ DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
+ TopRPTracker(TopPressure), Scheduled(false),
+ HighLatencyBlock(false), ID(ID),
+ Preds(), Succs(), NumHighLatencySuccessors(0) {};
+
+ ~SIScheduleBlock() {};
+
+ unsigned getID() const { return ID; }
+
+ /// Functions for Block construction.
+ void addUnit(SUnit *SU);
+
+ // When all SUs have been added.
+ void finalizeUnits();
+
+ // Add block pred, which has instruction predecessor of SU.
+ void addPred(SIScheduleBlock *Pred);
+ void addSucc(SIScheduleBlock *Succ);
+
+ const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
+ const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
+
+ unsigned Height; // Maximum topdown path length to block without outputs
+ unsigned Depth; // Maximum bottomup path length to block without inputs
+
+ unsigned getNumHighLatencySuccessors() const {
+ return NumHighLatencySuccessors;
+ }
+
+ bool isHighLatencyBlock() { return HighLatencyBlock; }
+
+ // This is approximative.
+ // Ideally should take into accounts some instructions (rcp, etc)
+ // are 4 times slower.
+ int getCost() { return SUnits.size(); }
+
+ // The block Predecessors and Successors must be all registered
+ // before fastSchedule().
+ // Fast schedule with no particular requirement.
+ void fastSchedule();
+
+ std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
+
+ // Complete schedule that will try to minimize reg pressure and
+ // low latencies, and will fill liveins and liveouts.
+ // Needs all MIs to be grouped between BeginBlock and EndBlock.
+ // The MIs can be moved after the scheduling,
+ // it is just used to allow correct track of live registers.
+ void schedule(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock);
+
+ bool isScheduled() { return Scheduled; }
+
+
+ // Needs the block to be scheduled inside
+ // TODO: find a way to compute it.
+ std::vector<unsigned> &getInternalAdditionnalRegUsage() {
+ return InternalAdditionnalPressure;
+ }
+
+ std::set<unsigned> &getInRegs() { return LiveInRegs; }
+ std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
+
+ void printDebug(bool Full);
+
+private:
+ struct SISchedCandidate : SISchedulerCandidate {
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ unsigned SGPRUsage;
+ unsigned VGPRUsage;
+ bool IsLowLatency;
+ unsigned LowLatencyOffset;
+ bool HasLowLatencyNonWaitedParent;
+
+ SISchedCandidate()
+ : SU(nullptr) {}
+
+ bool isValid() const { return SU; }
+
+ // Copy the status of another candidate without changing policy.
+ void setBest(SISchedCandidate &Best) {
+ assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+ SU = Best.SU;
+ Reason = Best.Reason;
+ SGPRUsage = Best.SGPRUsage;
+ VGPRUsage = Best.VGPRUsage;
+ IsLowLatency = Best.IsLowLatency;
+ LowLatencyOffset = Best.LowLatencyOffset;
+ HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
+ }
+ };
+
+ void undoSchedule();
+
+ void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void releaseSucc(SUnit *SU, SDep *SuccEdge);
+ // InOrOutBlock: restrict to links pointing inside the block (true),
+ // or restrict to links pointing outside the block (false).
+ void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
+
+ void nodeScheduled(SUnit *SU);
+ void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
+ void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
+ SUnit* pickNode();
+ void traceCandidate(const SISchedCandidate &Cand);
+ void initRegPressure(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock);
+};
+
+struct SIScheduleBlocks {
+ std::vector<SIScheduleBlock*> Blocks;
+ std::vector<int> TopDownIndex2Block;
+ std::vector<int> TopDownBlock2Index;
+};
+
+enum SISchedulerBlockCreatorVariant {
+ LatenciesAlone,
+ LatenciesGrouped,
+ LatenciesAlonePlusConsecutive
+};
+
+class SIScheduleBlockCreator {
+ SIScheduleDAGMI *DAG;
+ // unique_ptr handles freeing memory for us.
+ std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
+ std::map<SISchedulerBlockCreatorVariant,
+ SIScheduleBlocks> Blocks;
+ std::vector<SIScheduleBlock*> CurrentBlocks;
+ std::vector<int> Node2CurrentBlock;
+
+ // Topological sort
+ // Maps topological index to the node number.
+ std::vector<int> TopDownIndex2Block;
+ std::vector<int> TopDownBlock2Index;
+ std::vector<int> BottomUpIndex2Block;
+
+ // 0 -> Color not given.
+ // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
+ // Above -> Other groups.
+ int NextReservedID;
+ int NextNonReservedID;
+ std::vector<int> CurrentColoring;
+ std::vector<int> CurrentTopDownReservedDependencyColoring;
+ std::vector<int> CurrentBottomUpReservedDependencyColoring;
+
+public:
+ SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
+ ~SIScheduleBlockCreator();
+
+ SIScheduleBlocks
+ getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
+
+ bool isSUInBlock(SUnit *SU, unsigned ID);
+
+private:
+ // Give a Reserved color to every high latency.
+ void colorHighLatenciesAlone();
+
+ // Create groups of high latencies with a Reserved color.
+ void colorHighLatenciesGroups();
+
+ // Compute coloring for topdown and bottom traversals with
+ // different colors depending on dependencies on Reserved colors.
+ void colorComputeReservedDependencies();
+
+ // Give color to all non-colored SUs according to Reserved groups dependencies.
+ void colorAccordingToReservedDependencies();
+
+ // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
+ // The new colors are computed according to the dependencies on the other blocks
+ // formed with colorAccordingToReservedDependencies.
+ void colorEndsAccordingToDependencies();
+
+ // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
+ void colorForceConsecutiveOrderInGroup();
+
+ // Merge Constant loads that have all their users into another group to the group.
+ // (TODO: else if all their users depend on the same group, put them there)
+ void colorMergeConstantLoadsNextGroup();
+
+ // Merge SUs that have all their users into another group to the group
+ void colorMergeIfPossibleNextGroup();
+
+ // Merge SUs that have all their users into another group to the group,
+ // but only for Reserved groups.
+ void colorMergeIfPossibleNextGroupOnlyForReserved();
+
+ // Merge SUs that have all their users into another group to the group,
+ // but only if the group is no more than a few SUs.
+ void colorMergeIfPossibleSmallGroupsToNextGroup();
+
+ // Divides Blocks with important size.
+ // Idea of implementation: attribute new colors depending on topdown and
+ // bottom up links to other blocks.
+ void cutHugeBlocks();
+
+ // Put in one group all instructions with no users in this scheduling region
+ // (we'd want these groups be at the end).
+ void regroupNoUserInstructions();
+
+ void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
+
+ void topologicalSort();
+
+ void scheduleInsideBlocks();
+
+ void fillStats();
+};
+
+enum SISchedulerBlockSchedulerVariant {
+ BlockLatencyRegUsage,
+ BlockRegUsageLatency,
+ BlockRegUsage
+};
+
+class SIScheduleBlockScheduler {
+ SIScheduleDAGMI *DAG;
+ SISchedulerBlockSchedulerVariant Variant;
+ std::vector<SIScheduleBlock*> Blocks;
+
+ std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
+ std::set<unsigned> LiveRegs;
+ // Num of schedulable unscheduled blocks reading the register.
+ std::map<unsigned, unsigned> LiveRegsConsumers;
+
+ std::vector<unsigned> LastPosHighLatencyParentScheduled;
+ int LastPosWaitedHighLatency;
+
+ std::vector<SIScheduleBlock*> BlocksScheduled;
+ unsigned NumBlockScheduled;
+ std::vector<SIScheduleBlock*> ReadyBlocks;
+
+ unsigned VregCurrentUsage;
+ unsigned SregCurrentUsage;
+
+ // Currently is only approximation.
+ unsigned maxVregUsage;
+ unsigned maxSregUsage;
+
+ std::vector<unsigned> BlockNumPredsLeft;
+ std::vector<unsigned> BlockNumSuccsLeft;
+
+public:
+ SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
+ SISchedulerBlockSchedulerVariant Variant,
+ SIScheduleBlocks BlocksStruct);
+ ~SIScheduleBlockScheduler() {};
+
+ std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
+
+ unsigned getVGPRUsage() { return maxVregUsage; };
+ unsigned getSGPRUsage() { return maxSregUsage; };
+
+private:
+ struct SIBlockSchedCandidate : SISchedulerCandidate {
+ // The best Block candidate.
+ SIScheduleBlock *Block;
+
+ bool IsHighLatency;
+ int VGPRUsageDiff;
+ unsigned NumSuccessors;
+ unsigned NumHighLatencySuccessors;
+ unsigned LastPosHighLatParentScheduled;
+ unsigned Height;
+
+ SIBlockSchedCandidate()
+ : Block(nullptr) {}
+
+ bool isValid() const { return Block; }
+
+ // Copy the status of another candidate without changing policy.
+ void setBest(SIBlockSchedCandidate &Best) {
+ assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+ Block = Best.Block;
+ Reason = Best.Reason;
+ IsHighLatency = Best.IsHighLatency;
+ VGPRUsageDiff = Best.VGPRUsageDiff;
+ NumSuccessors = Best.NumSuccessors;
+ NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
+ LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
+ Height = Best.Height;
+ }
+ };
+
+ bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand);
+ bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand);
+ SIScheduleBlock *pickBlock();
+
+ void addLiveRegs(std::set<unsigned> &Regs);
+ void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
+ void releaseBlockSuccs(SIScheduleBlock *Parent);
+ void blockScheduled(SIScheduleBlock *Block);
+
+ // Check register pressure change
+ // by scheduling a block with these LiveIn and LiveOut.
+ std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
+ std::set<unsigned> &OutRegs);
+
+ void schedule();
+};
+
+struct SIScheduleBlockResult {
+ std::vector<unsigned> SUs;
+ unsigned MaxSGPRUsage;
+ unsigned MaxVGPRUsage;
+};
+
+class SIScheduler {
+ SIScheduleDAGMI *DAG;
+ SIScheduleBlockCreator BlockCreator;
+
+public:
+ SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
+
+ ~SIScheduler() {};
+
+ struct SIScheduleBlockResult
+ scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
+ SISchedulerBlockSchedulerVariant ScheduleVariant);
+};
+
+class SIScheduleDAGMI : public ScheduleDAGMILive {
+ const SIInstrInfo *SITII;
+ const SIRegisterInfo *SITRI;
+
+ std::vector<SUnit> SUnitsLinksBackup;
+
+ // For moveLowLatencies. After all Scheduling variants are tested.
+ std::vector<unsigned> ScheduledSUnits;
+ std::vector<unsigned> ScheduledSUnitsInv;
+
+ unsigned VGPRSetID;
+ unsigned SGPRSetID;
+
+public:
+ SIScheduleDAGMI(MachineSchedContext *C);
+
+ ~SIScheduleDAGMI() override;
+
+ // Entry point for the schedule.
+ void schedule() override;
+
+ // To init Block's RPTracker.
+ void initRPTracker(RegPressureTracker &RPTracker) {
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+ }
+
+ MachineBasicBlock *getBB() { return BB; }
+ MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
+ MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
+ LiveIntervals *getLIS() { return LIS; }
+ MachineRegisterInfo *getMRI() { return &MRI; }
+ const TargetRegisterInfo *getTRI() { return TRI; }
+ SUnit& getEntrySU() { return EntrySU; };
+ SUnit& getExitSU() { return ExitSU; };
+
+ void restoreSULinksLeft();
+
+ template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
+ _Iterator End,
+ unsigned &VgprUsage,
+ unsigned &SgprUsage);
+ std::set<unsigned> getInRegs() {
+ std::set<unsigned> InRegs (RPTracker.getPressure().LiveInRegs.begin(),
+ RPTracker.getPressure().LiveInRegs.end());
+ return InRegs;
+ };
+
+ unsigned getVGPRSetID() const { return VGPRSetID; }
+ unsigned getSGPRSetID() const { return SGPRSetID; }
+
+private:
+ void topologicalSort();
+ // After scheduling is done, improve low latency placements.
+ void moveLowLatencies();
+
+public:
+ // Some stats for scheduling inside blocks.
+ std::vector<unsigned> IsLowLatencySU;
+ std::vector<unsigned> LowLatencyOffset;
+ std::vector<unsigned> IsHighLatencySU;
+ // Topological sort
+ // Maps topological index to the node number.
+ std::vector<int> TopDownIndex2SU;
+ std::vector<int> BottomUpIndex2SU;
+};
+
+} // namespace llvm
+
+#endif /* SIMACHINESCHEDULER_H_ */
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2afa009..609f5e7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -23,7 +23,20 @@
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {
+ unsigned NumRegPressureSets = getNumRegPressureSets();
+
+ SGPR32SetID = NumRegPressureSets;
+ VGPR32SetID = NumRegPressureSets;
+ for (unsigned i = 0; i < NumRegPressureSets; ++i) {
+ if (strncmp("SGPR_32", getRegPressureSetName(i), 7) == 0)
+ SGPR32SetID = i;
+ else if (strncmp("VGPR_32", getRegPressureSetName(i), 7) == 0)
+ VGPR32SetID = i;
+ }
+ assert(SGPR32SetID < NumRegPressureSets &&
+ VGPR32SetID < NumRegPressureSets);
+}
void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
MCRegAliasIterator R(Reg, this, true);
@@ -36,18 +49,15 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
- unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
- if (ST.isXNACKEnabled())
- BaseIdx -= 4;
-
+ // Leave space for flat_scr, xnack_mask, vcc, and alignment
+ unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 8 - 4;
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
- // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
- // either way.
+ // 96/97 need to be reserved for flat_scr, 98/99 for xnack_mask, and
+ // 100/101 for vcc. This is the next sgpr128 down.
return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
}
@@ -58,25 +68,14 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
- unsigned Idx;
-
- if (!ST.isXNACKEnabled())
- Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
- else
- Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
-
+ unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
return AMDGPU::SGPR_32RegClass.getRegister(Idx);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- if (!ST.isXNACKEnabled()) {
- // Next register before reservations for flat_scr and vcc.
- return AMDGPU::SGPR97;
- } else {
- // Next register before reservations for flat_scr, xnack_mask, vcc,
- // and scratch resource.
- return AMDGPU::SGPR91;
- }
+ // Next register before reservations for flat_scr, xnack_mask, vcc,
+ // and scratch resource.
+ return AMDGPU::SGPR91;
}
return AMDGPU::SGPR95;
@@ -99,23 +98,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation
- // for VCC/FLAT_SCR.
+ // for VCC/XNACK_MASK/FLAT_SCR.
+ //
+ // TODO The SGPRs that alias to XNACK_MASK could be used as general purpose
+ // SGPRs when the XNACK feature is not used. This is currently not done
+ // because the code that counts SGPRs cannot account for such holes.
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
-
- if (ST.isXNACKEnabled())
- reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
}
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
- // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
- unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
-
- if (ST.isXNACKEnabled())
- Limit -= 2;
+ // Reserve some SGPRs for FLAT_SCRATCH, XNACK_MASK, and VCC (6 SGPRs).
+ unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6;
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
@@ -479,12 +477,38 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
if (SubIdx == AMDGPU::NoSubRegister)
return RC;
- // If this register has a sub-register, we can safely assume it is a 32-bit
- // register, because all of SI's sub-registers are 32-bit.
+ // We can assume that each lane corresponds to one 32-bit register.
+ unsigned Count = countPopulation(getSubRegIndexLaneMask(SubIdx));
if (isSGPRClass(RC)) {
- return &AMDGPU::SGPR_32RegClass;
+ switch (Count) {
+ case 1:
+ return &AMDGPU::SGPR_32RegClass;
+ case 2:
+ return &AMDGPU::SReg_64RegClass;
+ case 4:
+ return &AMDGPU::SReg_128RegClass;
+ case 8:
+ return &AMDGPU::SReg_256RegClass;
+ case 16: /* fall-through */
+ default:
+ llvm_unreachable("Invalid sub-register class size");
+ }
} else {
- return &AMDGPU::VGPR_32RegClass;
+ switch (Count) {
+ case 1:
+ return &AMDGPU::VGPR_32RegClass;
+ case 2:
+ return &AMDGPU::VReg_64RegClass;
+ case 3:
+ return &AMDGPU::VReg_96RegClass;
+ case 4:
+ return &AMDGPU::VReg_128RegClass;
+ case 8:
+ return &AMDGPU::VReg_256RegClass;
+ case 16: /* fall-through */
+ default:
+ llvm_unreachable("Invalid sub-register class size");
+ }
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 1795237..9410e20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -25,6 +25,9 @@ namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
private:
+ unsigned SGPR32SetID;
+ unsigned VGPR32SetID;
+
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
public:
@@ -146,6 +149,9 @@ public:
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
+ unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
+ unsigned getVGPR32PressureSet() const { return VGPR32SetID; };
+
private:
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, unsigned Value,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
index dbdc76b..d36c5d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -98,6 +98,9 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
SmallVector <Type*, 8> Types;
bool NeedToReplace = false;
Function *F = I.getCalledFunction();
+ if (!F)
+ return;
+
std::string Name = F->getName();
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
Value *Arg = I.getArgOperand(i);
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index add415e..3b4c235 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -106,20 +106,27 @@ bool isReadOnlySegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
}
-static const char ShaderTypeAttribute[] = "ShaderType";
-
-unsigned getShaderType(const Function &F) {
- Attribute A = F.getFnAttribute(ShaderTypeAttribute);
- unsigned ShaderType = ShaderType::COMPUTE;
+static unsigned getIntegerAttribute(const Function &F, const char *Name,
+ unsigned Default) {
+ Attribute A = F.getFnAttribute(Name);
+ unsigned Result = Default;
if (A.isStringAttribute()) {
StringRef Str = A.getValueAsString();
- if (Str.getAsInteger(0, ShaderType)) {
+ if (Str.getAsInteger(0, Result)) {
LLVMContext &Ctx = F.getContext();
Ctx.emitError("can't parse shader type");
}
}
- return ShaderType;
+ return Result;
+}
+
+unsigned getShaderType(const Function &F) {
+ return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE);
+}
+
+unsigned getInitialPSInputAddr(const Function &F) {
+ return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
bool isSI(const MCSubtargetInfo &STI) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 19419a2..57cbe1b5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -45,6 +45,8 @@ bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
unsigned getShaderType(const Function &F);
+unsigned getInitialPSInputAddr(const Function &F);
+
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 419717c..a520770 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -87,9 +87,22 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
}
+ if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
+ return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
+ ? CSR_iOS_CXX_TLS_PE_SaveList
+ : CSR_iOS_CXX_TLS_SaveList;
return RegList;
}
+const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<ARMFunctionInfo>()->isSplitCSR())
+ return CSR_iOS_CXX_TLS_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -97,6 +110,8 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
+ if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS)
+ return CSR_iOS_CXX_TLS_RegMask;
return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
@@ -106,6 +121,14 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
}
const uint32_t *
+ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
+ assert(MF.getSubtarget<ARMSubtarget>().isTargetDarwin() &&
+ "only know about special TLS call on Darwin");
+ return CSR_iOS_TLSCall_RegMask;
+}
+
+
+const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index cea8b80..6a9a45a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -62,6 +62,12 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
switch (Reg) {
case D15: case D14: case D13: case D12:
case D11: case D10: case D9: case D8:
+ case D7: case D6: case D5: case D4:
+ case D3: case D2: case D1: case D0:
+ case D31: case D30: case D29: case D28:
+ case D27: case D26: case D25: case D24:
+ case D23: case D22: case D21: case D20:
+ case D19: case D18: case D17: case D16:
return true;
default:
return false;
@@ -92,9 +98,12 @@ protected:
public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
+ const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 2335164..847ef87 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -225,6 +225,21 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
+def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
+ (sequence "R%u", 12, 1),
+ (sequence "D%u", 31, 0))>;
+
+// C++ TLS access function saves all registers except SP. Try to match
+// the order of CSRs in CSR_iOS.
+def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
+ (sequence "D%u", 31, 0))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>;
+
// The "interrupt" attribute is used to generate code that is acceptable in
// exception-handlers of various kinds. It makes us use a different return
// instruction (handled elsewhere) and affects which registers we must return to
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 9bdf823c..ff2fcfa 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -578,7 +578,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
- if (VT != MVT::i32) return 0;
+ if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
@@ -2083,6 +2083,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 0242440..dfbb969 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -622,7 +622,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else
Base = N;
@@ -801,7 +802,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -1067,7 +1069,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -1186,7 +1189,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
if (N.getOpcode() == ISD::ADD) {
return false; // We want to select register offset instead
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@@ -1292,7 +1296,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9cfb06b..37c0795 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -744,7 +744,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
- if (!Subtarget->isThumb1Only())
+ if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
// ARM does not have ROTL.
@@ -1385,6 +1385,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
else
return CallingConv::ARM_AAPCS;
case CallingConv::Fast:
+ case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
@@ -2347,6 +2348,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (ARM::GPRRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i32));
+ else if (ARM::DPRRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
// Update chain and glue.
RetOps[0] = Chain;
@@ -2530,6 +2544,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address for Darwin, and return an
+/// SDValue containing the final node.
+
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+/// + "extern __thread" declaration.
+/// + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i32] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first word, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "r0".
+///
+/// Since this descriptor may be in a different unit, in general access must
+/// proceed along the usual ARM rules. A common sequence to produce is:
+///
+/// movw rT1, :lower16:_var$non_lazy_ptr
+/// movt rT1, :upper16:_var$non_lazy_ptr
+/// ldr r0, [rT1]
+/// ldr rT2, [r0]
+/// blx rT2
+/// [...address now in r0...]
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+ SDLoc DL(Op);
+
+ // First step is to get the address of the actua global symbol. This is where
+ // the TLS descriptor lives.
+ SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
+
+ // The first entry in the descriptor is a function pointer that we must call
+ // to obtain the address of the variable.
+ SDValue Chain = DAG.getEntryNode();
+ SDValue FuncTLVGet =
+ DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, true, true, 4);
+ Chain = FuncTLVGet.getValue(1);
+
+ MachineFunction &F = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
+ // silly).
+ auto TRI =
+ getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+ auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
+ const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
+
+ // Finally, we can make the call. This is just a degenerate version of a
+ // normal AArch64 call node: r0 takes the address of the descriptor, and
+ // returns the address of the variable in this thread.
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
+ Chain =
+ DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
+ DAG.getRegisterMask(Mask), Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -2631,9 +2711,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->isTargetDarwin())
+ return LowerGlobalTLSAddressDarwin(Op, DAG);
+
// TODO: implement the "local dynamic" model
- assert(Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
+ assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);
@@ -11407,7 +11489,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'J':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a constant between -255 and -1, for negated ADD
// immediates. This can be used in GCC with an "n" modifier that
// prints the negated value, for use with SUB instructions. It is
@@ -11476,7 +11558,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'M':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a multiple of 4 between 0 and 1020, for
// ADD sp + immediate.
if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
@@ -12324,3 +12406,49 @@ unsigned ARMTargetLowering::getExceptionSelectorRegister(
// via the personality function.
return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
}
+
+void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ // Update IsSplitCSR in ARMFunctionInfo.
+ ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void ARMTargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (ARM::GPRRegClass.contains(*I))
+ RC = &ARM::GPRRegClass;
+ else if (ARM::DPRRegClass.contains(*I))
+ RC = &ARM::DPRRegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index b764624..96b56c3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -526,6 +526,8 @@ namespace llvm {
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
+ SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -578,6 +580,15 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index b9de83b..c446ba3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5398,6 +5398,27 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
Requires<[IsARM, UseMovt]>;
} // isReMaterializable
+// The many different faces of TLS access.
+def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst),
+ (MOVi32imm tglobaltlsaddr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapper tglobaltlsaddr:$src),
+ (LDRLIT_ga_abs tglobaltlsaddr:$src)>,
+ Requires<[IsARM, DontUseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, DontUseMovt]>;
+let AddedComplexity = 10 in
+def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
+ (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, UseMovt]>;
+
+
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 7020ffb..defef4e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -5689,7 +5689,10 @@ def : NEONInstAlias<"vmov${p} $Vd, $Vm",
// VMOV : Vector Move (Immediate)
-let isReMaterializable = 1 in {
+// Although VMOVs are not strictly speaking cheap, they are as expensive
+// as their copies counterpart (VORR), so we should prefer rematerialization
+// over splitting when it applies.
+let isReMaterializable = 1, isAsCheapAsAMove=1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
@@ -5744,7 +5747,7 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
"vmov", "f32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
-} // isReMaterializable
+} // isReMaterializable, isAsCheapAsAMove
// Add support for bytes replication feature, so it could be GAS compatible.
// E.g. instructions below:
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index df6f243..5b1f9a0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1366,6 +1366,14 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
(ARMWrapper tglobaladdr:$src))]>,
Requires<[IsThumb, DontUseMovt]>;
+// TLS globals
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+def : Pat<(ARMWrapper tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+
// JumpTable
def : T1Pat<(ARMWrapperJT tjumptable:$dst),
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index d460d33..f42f456 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3875,6 +3875,13 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
}
+def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst),
+ (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
+ (t2MOVi32imm tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index 050cd1a..63e7940 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -930,10 +930,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
// and could enable the conversion to float to be removed completely.
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
- Requires<[IsARM]>;
+ Requires<[IsARM, HasV6T2]>;
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
- Requires<[IsThumb2]>;
+ Requires<[IsThumb2, HasV6T2]>;
def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
Requires<[IsARM]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index ac0330f..71ad7a4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -20,4 +20,5 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+ IsSplitCSR(false) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index d644797..68f9aec 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -118,6 +118,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// coalesced weights.
DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -128,7 +132,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -199,6 +203,9 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
llvm_unreachable("Duplicate entries!");
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6084f22..57577dc 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -388,6 +388,9 @@ private:
size_t calculateContentSize() const;
+ // Reset state between object emissions
+ void reset() override;
+
public:
ARMTargetELFStreamer(MCStreamer &S)
: ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
@@ -415,7 +418,7 @@ public:
MCCodeEmitter *Emitter, bool IsThumb)
: MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
MappingSymbolCounter(0), LastEMS(EMS_None) {
- Reset();
+ EHReset();
}
~ARMELFStreamer() {}
@@ -579,7 +582,10 @@ private:
}
// Helper functions for ARM exception handling directives
- void Reset();
+ void EHReset();
+
+ // Reset state between object emissions
+ void reset() override;
void EmitPersonalityFixup(StringRef Name);
void FlushPendingOffset();
@@ -1040,6 +1046,8 @@ void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
getStreamer().emitInst(Inst, Suffix);
}
+void ARMTargetELFStreamer::reset() { AttributeSection = nullptr; }
+
void ARMELFStreamer::FinishImpl() {
MCTargetStreamer &TS = *getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -1048,6 +1056,18 @@ void ARMELFStreamer::FinishImpl() {
MCELFStreamer::FinishImpl();
}
+void ARMELFStreamer::reset() {
+ MCTargetStreamer &TS = *getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.reset();
+ MappingSymbolCounter = 0;
+ MCELFStreamer::reset();
+ // MCELFStreamer clear's the assembler's e_flags. However, for
+ // arm we manually set the ABI version on streamer creation, so
+ // do the same here
+ getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+}
+
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
unsigned Flags,
@@ -1094,7 +1114,7 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
Kind));
}
-void ARMELFStreamer::Reset() {
+void ARMELFStreamer::EHReset() {
ExTab = nullptr;
FnStart = nullptr;
Personality = nullptr;
@@ -1164,7 +1184,7 @@ void ARMELFStreamer::emitFnEnd() {
SwitchSection(&FnStart->getSection());
// Clean exception handling frame information
- Reset();
+ EHReset();
}
void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index dad50f2..c0d10c8 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -38,6 +38,9 @@ void ARMTargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+// reset() - Reset any state
+void ARMTargetStreamer::reset() {}
+
// The remaining callbacks should be handled separately by each
// streamer.
void ARMTargetStreamer::emitFnStart() {}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 812f983..27faac6 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -53,6 +53,11 @@ public:
/// \p MBB will be correctly handled by the target.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+ /// Disable shrink wrap as tBfar/BL will be used to adjust for long jumps.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return false;
+ }
+
private:
/// Check if the frame lowering of \p MF needs a special fixup
/// code sequence for the epilogue.
diff --git a/contrib/llvm/lib/Target/AVR/AVR.h b/contrib/llvm/lib/Target/AVR/AVR.h
new file mode 100644
index 0000000..4c1667e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVR.h
@@ -0,0 +1,54 @@
+//===-- AVR.h - Top-level interface for AVR representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AVR back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_H
+#define LLVM_AVR_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+class AVRTargetMachine;
+class FunctionPass;
+
+FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+FunctionPass *createAVRExpandPseudoPass();
+FunctionPass *createAVRFrameAnalyzerPass();
+FunctionPass *createAVRDynAllocaSRPass();
+FunctionPass *createAVRBranchSelectionPass();
+
+/**
+ * Contains the AVR backend.
+ */
+namespace AVR {
+
+enum AddressSpace { DataMemory, ProgramMemory };
+
+template <typename T> bool isProgramMemoryAddress(T *V) {
+ return cast<PointerType>(V->getType())->getAddressSpace() == ProgramMemory;
+}
+
+inline bool isProgramMemoryAccess(MemSDNode const *N) {
+ auto V = N->getMemOperand()->getValue();
+
+ return (V != nullptr) ? isProgramMemoryAddress(V) : false;
+}
+
+} // end of namespace AVR
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h b/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h
new file mode 100644
index 0000000..ee832ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- AVRSelectionDAGInfo.h - AVR SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AVR subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_SELECTION_DAG_INFO_H
+#define LLVM_AVR_SELECTION_DAG_INFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+/**
+ * Holds information about the AVR instruction selection DAG.
+ */
+class AVRSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_SELECTION_DAG_INFO_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
new file mode 100644
index 0000000..85f03e8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -0,0 +1,40 @@
+//===-- AVRTargetObjectFile.cpp - AVR Object Files ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRTargetObjectFile.h"
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+
+#include "AVR.h"
+
+namespace llvm {
+void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ Base::Initialize(Ctx, TM);
+ ProgmemDataSection =
+ Ctx.getELFSection(".progmem.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+}
+
+MCSection *
+AVRTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // Global values in flash memory are placed in the progmem.data section
+ // unless they already have a user assigned section.
+ if (AVR::isProgramMemoryAddress(GV) && !GV->hasSection())
+ return ProgmemDataSection;
+
+ // Otherwise, we work the same way as ELF.
+ return Base::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h
new file mode 100644
index 0000000..bdda35b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h
@@ -0,0 +1,35 @@
+//===-- AVRTargetObjectFile.h - AVR Object Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_TARGET_OBJECT_FILE_H
+#define LLVM_AVR_TARGET_OBJECT_FILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+/**
+ * Lowering for an AVR ELF32 object file.
+ */
+class AVRTargetObjectFile : public TargetLoweringObjectFileELF {
+ typedef TargetLoweringObjectFileELF Base;
+
+public:
+ void Initialize(MCContext &ctx, const TargetMachine &TM) override;
+
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+
+private:
+ MCSection *ProgmemDataSection;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_TARGET_OBJECT_FILE_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index e213089..4c7c039 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -190,9 +190,9 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
- MCStreamer &OutStreamer,
- const MCOperand &Imm, int AlignSize) {
+static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+ MCStreamer &OutStreamer, const MCOperand &Imm,
+ int AlignSize) {
MCSymbol *Sym;
int64_t Value;
if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 77907b0..4d2b545 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1275,6 +1275,8 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
if (!BT.has(RD.Reg))
continue;
const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
// Find a source operand that is equal to the result.
for (auto &Op : MI->uses()) {
@@ -1298,7 +1300,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
DebugLoc DL = MI->getDebugLoc();
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
unsigned NewR = MRI.createVirtualRegister(FRC);
- BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+ BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(RS.Reg, 0, RS.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
BT.put(BitTracker::RegisterRef(NewR), SC);
@@ -1925,7 +1927,9 @@ bool BitSimplification::genPackhl(MachineInstr *MI,
MachineBasicBlock &B = *MI->getParent();
unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
DebugLoc DL = MI->getDebugLoc();
- BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
.addReg(Rs.Reg, 0, Rs.Sub)
.addReg(Rt.Reg, 0, Rt.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
@@ -1950,9 +1954,11 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI,
// Prefer zxth, since zxth can go in any slot, while extractu only in
// slots 2 and 3.
unsigned NewR = 0;
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
if (L.Low && Opc != Hexagon::A2_zxth) {
NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
.addReg(L.Reg, 0, L.Sub);
} else if (!L.Low && Opc != Hexagon::S2_extractu) {
NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
@@ -1989,7 +1995,9 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI,
MachineBasicBlock &B = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(B, MI, DL, HII.get(COpc), NewR)
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ BuildMI(B, At, DL, HII.get(COpc), NewR)
.addReg(H.Reg, 0, H.Sub)
.addReg(L.Reg, 0, L.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
@@ -2043,7 +2051,9 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
continue;
unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
.addReg(RS.Reg, 0, RS.Sub);
if (NewOpc == Hexagon::A2_andir)
MIB.addImm((1 << W) - 1);
@@ -2076,6 +2086,8 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
return false;
MachineBasicBlock &B = *MI->getParent();
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
const BitTracker::BitValue &V = SC[F+BN];
@@ -2098,7 +2110,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
}
if (P != UINT_MAX) {
unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
- BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+ BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
.addReg(RR.Reg, 0, RR.Sub)
.addImm(P);
HBS::replaceReg(RD.Reg, NewR, MRI);
@@ -2108,7 +2120,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
} else if (V.is(0) || V.is(1)) {
unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
- BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+ BuildMI(B, At, DL, HII.get(NewOpc), NewR);
HBS::replaceReg(RD.Reg, NewR, MRI);
return true;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 87d6b35..37c2042 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -3320,6 +3320,7 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
/* u16_0Imm */ addr{15-0})));
// Store upper-half and store doubleword cannot be NV.
let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+ let Uses = !if (isAbs, [], [GP]);
let IClass = 0b0100;
let Inst{27} = 1;
@@ -3425,6 +3426,7 @@ class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
!if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
!if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
/* u16_0Imm */ addr{15-0})));
+ let Uses = !if (isAbs, [], [GP]);
let IClass = 0b0100;
let Inst{27} = 1;
@@ -3736,7 +3738,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Uses = [GP] in
class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
: T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp
new file mode 100644
index 0000000..06719cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp
@@ -0,0 +1,60 @@
+//===--- HexagonRDF.cpp ---------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRDF.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+
+#include "llvm/CodeGen/MachineInstr.h"
+
+using namespace llvm;
+using namespace rdf;
+
+bool HexagonRegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
+ if (RA == RB)
+ return true;
+
+ if (TargetRegisterInfo::isVirtualRegister(RA.Reg) &&
+ TargetRegisterInfo::isVirtualRegister(RB.Reg)) {
+ // Hexagon-specific cases.
+ if (RA.Reg == RB.Reg) {
+ if (RA.Sub == 0)
+ return true;
+ if (RB.Sub == 0)
+ return false;
+ }
+ }
+
+ return RegisterAliasInfo::covers(RA, RB);
+}
+
+bool HexagonRegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR)
+ const {
+ if (RRs.count(RR))
+ return true;
+
+ if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) {
+ assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+ // Check if both covering subregisters are present.
+ bool HasLo = RRs.count({RR.Reg, Hexagon::subreg_loreg});
+ bool HasHi = RRs.count({RR.Reg, Hexagon::subreg_hireg});
+ if (HasLo && HasHi)
+ return true;
+ }
+
+ if (RR.Sub == 0) {
+ // Check if both covering subregisters are present.
+ unsigned Lo = TRI.getSubReg(RR.Reg, Hexagon::subreg_loreg);
+ unsigned Hi = TRI.getSubReg(RR.Reg, Hexagon::subreg_hireg);
+ if (RRs.count({Lo, 0}) && RRs.count({Hi, 0}))
+ return true;
+ }
+
+ return RegisterAliasInfo::covers(RRs, RR);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h
new file mode 100644
index 0000000..00c1889
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h
@@ -0,0 +1,28 @@
+//===--- HexagonRDF.h -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_RDF_H
+#define HEXAGON_RDF_H
+#include "RDFGraph.h"
+
+namespace llvm {
+ class TargetRegisterInfo;
+}
+
+namespace rdf {
+ struct HexagonRegisterAliasInfo : public RegisterAliasInfo {
+ HexagonRegisterAliasInfo(const TargetRegisterInfo &TRI)
+ : RegisterAliasInfo(TRI) {}
+ bool covers(RegisterRef RA, RegisterRef RR) const override;
+ bool covers(const RegisterSet &RRs, RegisterRef RR) const override;
+ };
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
new file mode 100644
index 0000000..3fcda984
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -0,0 +1,272 @@
+//===--- HexagonRDFOpt.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRDF.h"
+#include "HexagonSubtarget.h"
+#include "RDFCopy.h"
+#include "RDFDeadCode.h"
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+namespace llvm {
+ void initializeHexagonRDFOptPass(PassRegistry&);
+ FunctionPass *createHexagonRDFOpt();
+}
+
+namespace {
+ cl::opt<unsigned> RDFLimit("rdf-limit", cl::init(UINT_MAX));
+ unsigned RDFCount = 0;
+ cl::opt<bool> RDFDump("rdf-dump", cl::init(false));
+
+ class HexagonRDFOpt : public MachineFunctionPass {
+ public:
+ HexagonRDFOpt() : MachineFunctionPass(ID) {
+ initializeHexagonRDFOptPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ const char *getPassName() const override {
+ return "Hexagon RDF optimizations";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+ private:
+ MachineDominatorTree *MDT;
+ MachineRegisterInfo *MRI;
+ };
+
+ char HexagonRDFOpt::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false)
+
+
+struct HexagonDCE : public DeadCodeElimination {
+ HexagonDCE(DataFlowGraph &G, MachineRegisterInfo &MRI)
+ : DeadCodeElimination(G, MRI) {}
+ bool rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove);
+ void removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum);
+
+ bool run();
+};
+
+
+bool HexagonDCE::run() {
+ bool Collected = collect();
+ if (!Collected)
+ return false;
+
+ const SetVector<NodeId> &DeadNodes = getDeadNodes();
+ const SetVector<NodeId> &DeadInstrs = getDeadInstrs();
+
+ typedef DenseMap<NodeId,NodeId> RefToInstrMap;
+ RefToInstrMap R2I;
+ SetVector<NodeId> PartlyDead;
+ DataFlowGraph &DFG = getDFG();
+
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (auto TA : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Stmt>, DFG)) {
+ NodeAddr<StmtNode*> SA = TA;
+ for (NodeAddr<RefNode*> RA : SA.Addr->members(DFG)) {
+ R2I.insert(std::make_pair(RA.Id, SA.Id));
+ if (DFG.IsDef(RA) && DeadNodes.count(RA.Id))
+ if (!DeadInstrs.count(SA.Id))
+ PartlyDead.insert(SA.Id);
+ }
+ }
+ }
+
+ // Nodes to remove.
+ SetVector<NodeId> Remove = DeadInstrs;
+
+ bool Changed = false;
+ for (NodeId N : PartlyDead) {
+ auto SA = DFG.addr<StmtNode*>(N);
+ if (trace())
+ dbgs() << "Partly dead: " << *SA.Addr->getCode();
+ Changed |= rewrite(SA, Remove);
+ }
+
+ return erase(Remove) || Changed;
+}
+
+
+void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) {
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+
+ auto getOpNum = [MI] (MachineOperand &Op) -> unsigned {
+ for (unsigned i = 0, n = MI->getNumOperands(); i != n; ++i)
+ if (&MI->getOperand(i) == &Op)
+ return i;
+ llvm_unreachable("Invalid operand");
+ };
+ DenseMap<NodeId,unsigned> OpMap;
+ NodeList Refs = IA.Addr->members(getDFG());
+ for (NodeAddr<RefNode*> RA : Refs)
+ OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp())));
+
+ MI->RemoveOperand(OpNum);
+
+ for (NodeAddr<RefNode*> RA : Refs) {
+ unsigned N = OpMap[RA.Id];
+ if (N < OpNum)
+ RA.Addr->setRegRef(&MI->getOperand(N));
+ else if (N > OpNum)
+ RA.Addr->setRegRef(&MI->getOperand(N-1));
+ }
+}
+
+
+bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) {
+ if (!getDFG().IsCode<NodeAttrs::Stmt>(IA))
+ return false;
+ DataFlowGraph &DFG = getDFG();
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ auto &HII = static_cast<const HexagonInstrInfo&>(DFG.getTII());
+ if (HII.getAddrMode(MI) != HexagonII::PostInc)
+ return false;
+ unsigned Opc = MI->getOpcode();
+ unsigned OpNum, NewOpc;
+ switch (Opc) {
+ case Hexagon::L2_loadri_pi:
+ NewOpc = Hexagon::L2_loadri_io;
+ OpNum = 1;
+ break;
+ case Hexagon::L2_loadrd_pi:
+ NewOpc = Hexagon::L2_loadrd_io;
+ OpNum = 1;
+ break;
+ case Hexagon::V6_vL32b_pi:
+ NewOpc = Hexagon::V6_vL32b_ai;
+ OpNum = 1;
+ break;
+ case Hexagon::S2_storeri_pi:
+ NewOpc = Hexagon::S2_storeri_io;
+ OpNum = 0;
+ break;
+ case Hexagon::S2_storerd_pi:
+ NewOpc = Hexagon::S2_storerd_io;
+ OpNum = 0;
+ break;
+ case Hexagon::V6_vS32b_pi:
+ NewOpc = Hexagon::V6_vS32b_ai;
+ OpNum = 0;
+ break;
+ default:
+ return false;
+ }
+ auto IsDead = [this] (NodeAddr<DefNode*> DA) -> bool {
+ return getDeadNodes().count(DA.Id);
+ };
+ NodeList Defs;
+ MachineOperand &Op = MI->getOperand(OpNum);
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) {
+ if (&DA.Addr->getOp() != &Op)
+ continue;
+ Defs = DFG.getRelatedRefs(IA, DA);
+ if (!std::all_of(Defs.begin(), Defs.end(), IsDead))
+ return false;
+ break;
+ }
+
+ // Mark all nodes in Defs for removal.
+ for (auto D : Defs)
+ Remove.insert(D.Id);
+
+ if (trace())
+ dbgs() << "Rewriting: " << *MI;
+ MI->setDesc(HII.get(NewOpc));
+ MI->getOperand(OpNum+2).setImm(0);
+ removeOperand(IA, OpNum);
+ if (trace())
+ dbgs() << " to: " << *MI;
+
+ return true;
+}
+
+
+bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) {
+ if (RDFLimit.getPosition()) {
+ if (RDFCount >= RDFLimit)
+ return false;
+ RDFCount++;
+ }
+
+ MDT = &getAnalysis<MachineDominatorTree>();
+ const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+ const auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ const auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ HexagonRegisterAliasInfo HAI(HRI);
+ TargetOperandInfo TOI(HII);
+
+ if (RDFDump)
+ MF.print(dbgs() << "Before " << getPassName() << "\n", nullptr);
+ DataFlowGraph G(MF, HII, HRI, *MDT, MDF, HAI, TOI);
+ G.build();
+ if (RDFDump) {
+ dbgs() << PrintNode<FuncNode*>(G.getFunc(), G) << '\n';
+ dbgs() << MF.getName() << '\n';
+ }
+
+ bool Changed;
+ CopyPropagation CP(G);
+ CP.trace(RDFDump);
+ Changed = CP.run();
+ if (Changed)
+ G.build();
+
+ HexagonDCE DCE(G, *MRI);
+ DCE.trace(RDFDump);
+ Changed |= DCE.run();
+
+ if (Changed) {
+ Liveness LV(*MRI, G);
+ LV.trace(RDFDump);
+ LV.computeLiveIns();
+ LV.resetLiveIns();
+ LV.resetKills();
+ }
+
+ if (RDFDump)
+ MF.print(dbgs() << "After " << getPassName() << "\n", nullptr);
+ return false;
+}
+
+
+FunctionPass *llvm::createHexagonRDFOpt() {
+ return new HexagonRDFOpt();
+}
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 61c0589..6e5f732 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -103,6 +103,8 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::R30);
Reserved.set(Hexagon::R31);
Reserved.set(Hexagon::PC);
+ Reserved.set(Hexagon::GP);
+ Reserved.set(Hexagon::D14);
Reserved.set(Hexagon::D15);
Reserved.set(Hexagon::LC0);
Reserved.set(Hexagon::LC1);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 9dccd69..34b03fb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -26,7 +26,11 @@
using namespace llvm;
-static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+
+static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore,
+ cl::init(true), cl::desc("Enable RDF-based optimizations"));
+
+static cl::opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
@@ -111,6 +115,7 @@ namespace llvm {
FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonPeephole();
+ FunctionPass *createHexagonRDFOpt();
FunctionPass *createHexagonSplitConst32AndConst64();
FunctionPass *createHexagonSplitDoubleRegs();
FunctionPass *createHexagonStoreWidening();
@@ -262,9 +267,12 @@ void HexagonPassConfig::addPreRegAlloc() {
}
void HexagonPassConfig::addPostRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (EnableRDFOpt)
+ addPass(createHexagonRDFOpt());
if (!DisableHexagonCFGOpt)
addPass(createHexagonCFGOptimizer(), false);
+ }
}
void HexagonPassConfig::addPreSched2() {
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index c2c6275..4b07ca7 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -334,21 +334,21 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
// The only relocs left should be GP relative:
default:
if (MCID.mayStore() || MCID.mayLoad()) {
- for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
- ++ImpUses) {
- if (*ImpUses == Hexagon::GP) {
- switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
- case HexagonII::MemAccessSize::ByteAccess:
- return fixup_Hexagon_GPREL16_0;
- case HexagonII::MemAccessSize::HalfWordAccess:
- return fixup_Hexagon_GPREL16_1;
- case HexagonII::MemAccessSize::WordAccess:
- return fixup_Hexagon_GPREL16_2;
- case HexagonII::MemAccessSize::DoubleWordAccess:
- return fixup_Hexagon_GPREL16_3;
- default:
- llvm_unreachable("unhandled fixup");
- }
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses();
+ ImpUses && *ImpUses; ++ImpUses) {
+ if (*ImpUses != Hexagon::GP)
+ continue;
+ switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+ case HexagonII::MemAccessSize::ByteAccess:
+ return fixup_Hexagon_GPREL16_0;
+ case HexagonII::MemAccessSize::HalfWordAccess:
+ return fixup_Hexagon_GPREL16_1;
+ case HexagonII::MemAccessSize::WordAccess:
+ return fixup_Hexagon_GPREL16_2;
+ case HexagonII::MemAccessSize::DoubleWordAccess:
+ return fixup_Hexagon_GPREL16_3;
+ default:
+ llvm_unreachable("unhandled fixup");
}
}
} else
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 6ceb848..4e1cce3 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -95,14 +95,7 @@ unsigned HexagonResource::setWeight(unsigned s) {
return (Weight);
}
-HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
-
-bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
-
-bool HexagonCVIResource::setup() {
- assert(!TUL);
- TUL = new (TypeUnitsAndLanes);
-
+void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
(*TUL)[HexagonII::TypeCVI_VA] =
UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
(*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
@@ -123,13 +116,12 @@ bool HexagonCVIResource::setup() {
(*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
(*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
(*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
-
- return true;
}
-HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
+ MCInstrInfo const &MCII, unsigned s,
MCInst const *id)
- : HexagonResource(s) {
+ : HexagonResource(s), TUL(TUL) {
unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
if (TUL->count(T)) {
@@ -153,6 +145,7 @@ HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI)
: MCII(MCII), STI(STI) {
reset();
+ HexagonCVIResource::SetupTUL(&TUL, STI.getCPU());
}
void HexagonShuffler::reset() {
@@ -163,7 +156,7 @@ void HexagonShuffler::reset() {
void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
unsigned S, bool X) {
- HexagonInstr PI(MCII, ID, Extender, S, X);
+ HexagonInstr PI(&TUL, MCII, ID, Extender, S, X);
Packet.push_back(PI);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 174f10f..a093f85 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
@@ -53,9 +54,11 @@ public:
// HVX insn resources.
class HexagonCVIResource : public HexagonResource {
+public:
typedef std::pair<unsigned, unsigned> UnitsAndLanes;
typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
+private:
// Available HVX slots.
enum {
CVI_NONE = 0,
@@ -65,9 +68,7 @@ class HexagonCVIResource : public HexagonResource {
CVI_MPY1 = 1 << 3
};
- static bool SetUp;
- static bool setup();
- static TypeUnitsAndLanes *TUL;
+ TypeUnitsAndLanes *TUL;
// Count of adjacent slots that the insn requires to be executed.
unsigned Lanes;
@@ -81,7 +82,9 @@ class HexagonCVIResource : public HexagonResource {
void setStore(bool f = true) { Store = f; };
public:
- HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+ HexagonCVIResource(TypeUnitsAndLanes *TUL, MCInstrInfo const &MCII,
+ unsigned s, MCInst const *id);
+ static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU);
bool isValid() const { return (Valid); };
unsigned getLanes() const { return (Lanes); };
@@ -100,10 +103,11 @@ class HexagonInstr {
bool SoloException;
public:
- HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+ HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
+ MCInstrInfo const &MCII, MCInst const *id,
MCInst const *Extender, unsigned s, bool x = false)
- : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
- SoloException(x){};
+ : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id),
+ SoloException(x) {};
MCInst const *getDesc() const { return (ID); };
@@ -136,6 +140,8 @@ class HexagonShuffler {
// Shuffling error code.
unsigned Error;
+ HexagonCVIResource::TypeUnitsAndLanes TUL;
+
protected:
int64_t BundleFlags;
MCInstrInfo const &MCII;
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
new file mode 100644
index 0000000..c547c71
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -0,0 +1,180 @@
+//===--- RDFCopy.cpp ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simplistic RDF-based copy propagation.
+
+#include "RDFCopy.h"
+#include "RDFGraph.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+#include <atomic>
+
+#ifndef NDEBUG
+static cl::opt<unsigned> CpLimit("rdf-cp-limit", cl::init(0), cl::Hidden);
+static unsigned CpCount = 0;
+#endif
+
+using namespace llvm;
+using namespace rdf;
+
+void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI) {
+ assert(MI->getOpcode() == TargetOpcode::COPY);
+ const MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1);
+ RegisterRef DstR = { Op0.getReg(), Op0.getSubReg() };
+ RegisterRef SrcR = { Op1.getReg(), Op1.getSubReg() };
+ auto FS = DefM.find(SrcR);
+ if (FS == DefM.end() || FS->second.empty())
+ return;
+ Copies.push_back(SA.Id);
+ RDefMap[SrcR][SA.Id] = FS->second.top()->Id;
+ // Insert DstR into the map.
+ RDefMap[DstR];
+}
+
+
+void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
+ RegisterSet RRs;
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ RRs.insert(RA.Addr->getRegRef());
+ bool Common = false;
+ for (auto &R : RDefMap) {
+ if (!RRs.count(R.first))
+ continue;
+ Common = true;
+ break;
+ }
+ if (!Common)
+ return;
+
+ for (auto &R : RDefMap) {
+ if (!RRs.count(R.first))
+ continue;
+ auto F = DefM.find(R.first);
+ if (F == DefM.end() || F->second.empty())
+ continue;
+ R.second[IA.Id] = F->second.top()->Id;
+ }
+}
+
+
+bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
+ bool Changed = false;
+ auto BA = DFG.getFunc().Addr->findBlock(B, DFG);
+ DFG.markBlock(BA.Id, DefM);
+
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+ if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
+ NodeAddr<StmtNode*> SA = IA;
+ MachineInstr *MI = SA.Addr->getCode();
+ if (MI->isCopy())
+ recordCopy(SA, MI);
+ }
+
+ updateMap(IA);
+ DFG.pushDefs(IA, DefM);
+ }
+
+ MachineDomTreeNode *N = MDT.getNode(B);
+ for (auto I : *N)
+ Changed |= scanBlock(I->getBlock());
+
+ DFG.releaseBlock(BA.Id, DefM);
+ return Changed;
+}
+
+
+bool CopyPropagation::run() {
+ scanBlock(&DFG.getMF().front());
+
+ if (trace()) {
+ dbgs() << "Copies:\n";
+ for (auto I : Copies)
+ dbgs() << *DFG.addr<StmtNode*>(I).Addr->getCode();
+ dbgs() << "\nRDef map:\n";
+ for (auto R : RDefMap) {
+ dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {";
+ for (auto &M : R.second)
+ dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':'
+ << Print<NodeId>(M.second, DFG);
+ dbgs() << " }\n";
+ }
+ }
+
+ bool Changed = false;
+ NodeSet Deleted;
+#ifndef NDEBUG
+ bool HasLimit = CpLimit.getNumOccurrences() > 0;
+#endif
+
+ for (auto I : Copies) {
+#ifndef NDEBUG
+ if (HasLimit && CpCount >= CpLimit)
+ break;
+#endif
+ if (Deleted.count(I))
+ continue;
+ auto SA = DFG.addr<InstrNode*>(I);
+ NodeList Ds = SA.Addr->members_if(DFG.IsDef, DFG);
+ if (Ds.size() != 1)
+ continue;
+ NodeAddr<DefNode*> DA = Ds[0];
+ RegisterRef DR0 = DA.Addr->getRegRef();
+ NodeList Us = SA.Addr->members_if(DFG.IsUse, DFG);
+ if (Us.size() != 1)
+ continue;
+ NodeAddr<UseNode*> UA0 = Us[0];
+ RegisterRef UR0 = UA0.Addr->getRegRef();
+ NodeId RD0 = UA0.Addr->getReachingDef();
+
+ for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) {
+ auto UA = DFG.addr<UseNode*>(N);
+ NextN = UA.Addr->getSibling();
+ uint16_t F = UA.Addr->getFlags();
+ if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed))
+ continue;
+ if (UA.Addr->getRegRef() != DR0)
+ continue;
+ NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG);
+ assert(DFG.IsCode<NodeAttrs::Stmt>(IA));
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ if (RDefMap[UR0][IA.Id] != RD0)
+ continue;
+ MachineOperand &Op = UA.Addr->getOp();
+ if (Op.isTied())
+ continue;
+ if (trace()) {
+ dbgs() << "can replace " << Print<RegisterRef>(DR0, DFG)
+ << " with " << Print<RegisterRef>(UR0, DFG) << " in "
+ << *NodeAddr<StmtNode*>(IA).Addr->getCode();
+ }
+
+ Op.setReg(UR0.Reg);
+ Op.setSubReg(UR0.Sub);
+ Changed = true;
+#ifndef NDEBUG
+ if (HasLimit && CpCount >= CpLimit)
+ break;
+ CpCount++;
+#endif
+
+ if (MI->isCopy()) {
+ MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1);
+ if (Op0.getReg() == Op1.getReg() && Op0.getSubReg() == Op1.getSubReg())
+ MI->eraseFromParent();
+ Deleted.insert(IA.Id);
+ }
+ }
+ }
+
+ return Changed;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
new file mode 100644
index 0000000..02531b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -0,0 +1,48 @@
+//===--- RDFCopy.h --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef RDF_COPY_H
+#define RDF_COPY_H
+
+#include "RDFGraph.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+ class MachineBasicBlock;
+ class MachineDominatorTree;
+ class MachineInstr;
+}
+
+namespace rdf {
+ struct CopyPropagation {
+ CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
+ Trace(false) {}
+
+ bool run();
+ void trace(bool On) { Trace = On; }
+ bool trace() const { return Trace; }
+
+ private:
+ const MachineDominatorTree &MDT;
+ DataFlowGraph &DFG;
+ DataFlowGraph::DefStackMap DefM;
+ bool Trace;
+
+ // map: register -> (map: stmt -> reaching def)
+ std::map<RegisterRef,std::map<NodeId,NodeId>> RDefMap;
+ std::vector<NodeId> Copies;
+
+ void recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI);
+ void updateMap(NodeAddr<InstrNode*> IA);
+ bool scanBlock(MachineBasicBlock *B);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
new file mode 100644
index 0000000..9566857
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -0,0 +1,204 @@
+//===--- RDFDeadCode.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RDF-based generic dead code elimination.
+
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "RDFDeadCode.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+// Check if the given instruction has observable side-effects, i.e. if
+// it should be considered "live". It is safe for this function to be
+// overly conservative (i.e. return "true" for all instructions), but it
+// is not safe to return "false" for an instruction that should not be
+// considered removable.
+bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const {
+ if (MI->mayStore() || MI->isBranch() || MI->isCall() || MI->isReturn())
+ return true;
+ if (MI->hasOrderedMemoryRef() || MI->hasUnmodeledSideEffects())
+ return true;
+ if (MI->isPHI())
+ return false;
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && MRI.isReserved(Op.getReg()))
+ return true;
+ return false;
+}
+
+void DeadCodeElimination::scanInstr(NodeAddr<InstrNode*> IA,
+ SetVector<NodeId> &WorkQ) {
+ if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
+ return;
+ if (!isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+ return;
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) {
+ if (!LiveNodes.count(RA.Id))
+ WorkQ.insert(RA.Id);
+ }
+}
+
+void DeadCodeElimination::processDef(NodeAddr<DefNode*> DA,
+ SetVector<NodeId> &WorkQ) {
+ NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
+ for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+ if (!LiveNodes.count(UA.Id))
+ WorkQ.insert(UA.Id);
+ }
+ for (NodeAddr<DefNode*> TA : DFG.getRelatedRefs(IA, DA))
+ LiveNodes.insert(TA.Id);
+}
+
+void DeadCodeElimination::processUse(NodeAddr<UseNode*> UA,
+ SetVector<NodeId> &WorkQ) {
+ for (NodeAddr<DefNode*> DA : LV.getAllReachingDefs(UA)) {
+ if (!LiveNodes.count(DA.Id))
+ WorkQ.insert(DA.Id);
+ }
+}
+
+// Traverse the DFG and collect the set dead RefNodes and the set of
+// dead instructions. Return "true" if any of these sets is non-empty,
+// "false" otherwise.
+bool DeadCodeElimination::collect() {
+ // This function works by first finding all live nodes. The dead nodes
+ // are then the complement of the set of live nodes.
+ //
+ // Assume that all nodes are dead. Identify instructions which must be
+ // considered live, i.e. instructions with observable side-effects, such
+ // as calls and stores. All arguments of such instructions are considered
+ // live. For each live def, all operands used in the corresponding
+ // instruction are considered live. For each live use, all its reaching
+ // defs are considered live.
+ LiveNodes.clear();
+ SetVector<NodeId> WorkQ;
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG))
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG))
+ scanInstr(IA, WorkQ);
+
+ while (!WorkQ.empty()) {
+ NodeId N = *WorkQ.begin();
+ WorkQ.remove(N);
+ LiveNodes.insert(N);
+ auto RA = DFG.addr<RefNode*>(N);
+ if (DFG.IsDef(RA))
+ processDef(RA, WorkQ);
+ else
+ processUse(RA, WorkQ);
+ }
+
+ if (trace()) {
+ dbgs() << "Live nodes:\n";
+ for (NodeId N : LiveNodes) {
+ auto RA = DFG.addr<RefNode*>(N);
+ dbgs() << PrintNode<RefNode*>(RA, DFG) << "\n";
+ }
+ }
+
+ auto IsDead = [this] (NodeAddr<InstrNode*> IA) -> bool {
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG))
+ if (LiveNodes.count(DA.Id))
+ return false;
+ return true;
+ };
+
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ if (!LiveNodes.count(RA.Id))
+ DeadNodes.insert(RA.Id);
+ if (DFG.IsCode<NodeAttrs::Stmt>(IA))
+ if (isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+ continue;
+ if (IsDead(IA)) {
+ DeadInstrs.insert(IA.Id);
+ if (trace())
+ dbgs() << "Dead instr: " << PrintNode<InstrNode*>(IA, DFG) << "\n";
+ }
+ }
+ }
+
+ return !DeadNodes.empty();
+}
+
+// Erase the nodes given in the Nodes set from DFG. In addition to removing
+// them from the DFG, if a node corresponds to a statement, the corresponding
+// machine instruction is erased from the function.
+bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
+ if (Nodes.empty())
+ return false;
+
+ // Prepare the actual set of ref nodes to remove: ref nodes from Nodes
+ // are included directly, for each InstrNode in Nodes, include the set
+ // of all RefNodes from it.
+ NodeList DRNs, DINs;
+ for (auto I : Nodes) {
+ auto BA = DFG.addr<NodeBase*>(I);
+ uint16_t Type = BA.Addr->getType();
+ if (Type == NodeAttrs::Ref) {
+ DRNs.push_back(DFG.addr<RefNode*>(I));
+ continue;
+ }
+
+ // If it's a code node, add all ref nodes from it.
+ uint16_t Kind = BA.Addr->getKind();
+ if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) {
+ for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG))
+ DRNs.push_back(N);
+ DINs.push_back(DFG.addr<InstrNode*>(I));
+ } else {
+ llvm_unreachable("Unexpected code node");
+ return false;
+ }
+ }
+
+ // Sort the list so that use nodes are removed first. This makes the
+ // "unlink" functions a bit faster.
+ auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool {
+ uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind();
+ if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def)
+ return true;
+ if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use)
+ return false;
+ return A.Id < B.Id;
+ };
+ std::sort(DRNs.begin(), DRNs.end(), UsesFirst);
+
+ if (trace())
+ dbgs() << "Removing dead ref nodes:\n";
+ for (NodeAddr<RefNode*> RA : DRNs) {
+ if (trace())
+ dbgs() << " " << PrintNode<RefNode*>(RA, DFG) << '\n';
+ if (DFG.IsUse(RA))
+ DFG.unlinkUse(RA);
+ else if (DFG.IsDef(RA))
+ DFG.unlinkDef(RA);
+ }
+
+ // Now, remove all dead instruction nodes.
+ for (NodeAddr<InstrNode*> IA : DINs) {
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ BA.Addr->removeMember(IA, DFG);
+ if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
+ continue;
+
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ if (trace())
+ dbgs() << "erasing: " << *MI;
+ MI->eraseFromParent();
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h
new file mode 100644
index 0000000..f4373fb
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.h
@@ -0,0 +1,65 @@
+//===--- RDFDeadCode.h ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RDF-based generic dead code elimination.
+//
+// The main interface of this class are functions "collect" and "erase".
+// This allows custom processing of the function being optimized by a
+// particular consumer. The simplest way to use this class would be to
+// instantiate an object, and then simply call "collect" and "erase",
+// passing the result of "getDeadInstrs()" to it.
+// A more complex scenario would be to call "collect" first, then visit
+// all post-increment instructions to see if the address update is dead
+// or not, and if it is, convert the instruction to a non-updating form.
+// After that "erase" can be called with the set of nodes including both,
+// dead defs from the updating instructions and the nodes corresponding
+// to the dead instructions.
+
+#ifndef RDF_DEADCODE_H
+#define RDF_DEADCODE_H
+
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace llvm {
+ class MachineRegisterInfo;
+}
+
+namespace rdf {
+ struct DeadCodeElimination {
+ DeadCodeElimination(DataFlowGraph &dfg, MachineRegisterInfo &mri)
+ : Trace(false), DFG(dfg), MRI(mri), LV(mri, dfg) {}
+
+ bool collect();
+ bool erase(const SetVector<NodeId> &Nodes);
+ void trace(bool On) { Trace = On; }
+ bool trace() const { return Trace; }
+
+ SetVector<NodeId> getDeadNodes() { return DeadNodes; }
+ SetVector<NodeId> getDeadInstrs() { return DeadInstrs; }
+ DataFlowGraph &getDFG() { return DFG; }
+
+ private:
+ bool Trace;
+ SetVector<NodeId> LiveNodes;
+ SetVector<NodeId> DeadNodes;
+ SetVector<NodeId> DeadInstrs;
+ DataFlowGraph &DFG;
+ MachineRegisterInfo &MRI;
+ Liveness LV;
+
+ bool isLiveInstr(const MachineInstr *MI) const;
+ void scanInstr(NodeAddr<InstrNode*> IA, SetVector<NodeId> &WorkQ);
+ void processDef(NodeAddr<DefNode*> DA, SetVector<NodeId> &WorkQ);
+ void processUse(NodeAddr<UseNode*> UA, SetVector<NodeId> &WorkQ);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
new file mode 100644
index 0000000..9b47422
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -0,0 +1,1716 @@
+//===--- RDFGraph.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF).
+//
+#include "RDFGraph.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+// Printing functions. Have them here first, so that the rest of the code
+// can use them.
+namespace rdf {
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
+ auto &TRI = P.G.getTRI();
+ if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
+ OS << TRI.getName(P.Obj.Reg);
+ else
+ OS << '#' << P.Obj.Reg;
+ if (P.Obj.Sub > 0) {
+ OS << ':';
+ if (P.Obj.Sub < TRI.getNumSubRegIndices())
+ OS << TRI.getSubRegIndexName(P.Obj.Sub);
+ else
+ OS << '#' << P.Obj.Sub;
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
+ auto NA = P.G.addr<NodeBase*>(P.Obj);
+ uint16_t Attrs = NA.Addr->getAttrs();
+ uint16_t Kind = NodeAttrs::kind(Attrs);
+ uint16_t Flags = NodeAttrs::flags(Attrs);
+ switch (NodeAttrs::type(Attrs)) {
+ case NodeAttrs::Code:
+ switch (Kind) {
+ case NodeAttrs::Func: OS << 'f'; break;
+ case NodeAttrs::Block: OS << 'b'; break;
+ case NodeAttrs::Stmt: OS << 's'; break;
+ case NodeAttrs::Phi: OS << 'p'; break;
+ default: OS << "c?"; break;
+ }
+ break;
+ case NodeAttrs::Ref:
+ if (Flags & NodeAttrs::Preserving)
+ OS << '+';
+ if (Flags & NodeAttrs::Clobbering)
+ OS << '~';
+ switch (Kind) {
+ case NodeAttrs::Use: OS << 'u'; break;
+ case NodeAttrs::Def: OS << 'd'; break;
+ case NodeAttrs::Block: OS << 'b'; break;
+ default: OS << "r?"; break;
+ }
+ break;
+ default:
+ OS << '?';
+ break;
+ }
+ OS << P.Obj;
+ if (Flags & NodeAttrs::Shadow)
+ OS << '"';
+ return OS;
+}
+
+namespace {
+ void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
+ const DataFlowGraph &G) {
+ OS << Print<NodeId>(RA.Id, G) << '<'
+ << Print<RegisterRef>(RA.Addr->getRegRef(), G) << '>';
+ if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+ OS << '!';
+ }
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getReachedDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getReachedUse())
+ OS << Print<NodeId>(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print<NodeId>(N, P.G);
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print<NodeId>(N, P.G);
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<PhiUseNode*>> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getPredecessor())
+ OS << Print<NodeId>(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print<NodeId>(N, P.G);
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
+ switch (P.Obj.Addr->getKind()) {
+ case NodeAttrs::Def:
+ OS << PrintNode<DefNode*>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Use:
+ if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
+ OS << PrintNode<PhiUseNode*>(P.Obj, P.G);
+ else
+ OS << PrintNode<UseNode*>(P.Obj, P.G);
+ break;
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
+ unsigned N = P.Obj.size();
+ for (auto I : P.Obj) {
+ OS << Print<NodeId>(I.Id, P.G);
+ if (--N)
+ OS << ' ';
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
+ unsigned N = P.Obj.size();
+ for (auto I : P.Obj) {
+ OS << Print<NodeId>(I, P.G);
+ if (--N)
+ OS << ' ';
+ }
+ return OS;
+}
+
+namespace {
+ template <typename T>
+ struct PrintListV {
+ PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+ typedef T Type;
+ const NodeList &List;
+ const DataFlowGraph &G;
+ };
+
+ template <typename T>
+ raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) {
+ unsigned N = P.List.size();
+ for (NodeAddr<T> A : P.List) {
+ OS << PrintNode<T>(A, P.G);
+ if (--N)
+ OS << ", ";
+ }
+ return OS;
+ }
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
+ OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi ["
+ << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<StmtNode*>> &P) {
+ unsigned Opc = P.Obj.Addr->getCode()->getOpcode();
+ OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc)
+ << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<InstrNode*>> &P) {
+ switch (P.Obj.Addr->getKind()) {
+ case NodeAttrs::Phi:
+ OS << PrintNode<PhiNode*>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Stmt:
+ OS << PrintNode<StmtNode*>(P.Obj, P.G);
+ break;
+ default:
+ OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G);
+ break;
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<BlockNode*>> &P) {
+ auto *BB = P.Obj.Addr->getCode();
+ unsigned NP = BB->pred_size();
+ std::vector<int> Ns;
+ auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void {
+ unsigned N = Ns.size();
+ for (auto I : Ns) {
+ OS << "BB#" << I;
+ if (--N)
+ OS << ", ";
+ }
+ };
+
+ OS << Print<NodeId>(P.Obj.Id, P.G) << ": === BB#" << BB->getNumber()
+ << " === preds(" << NP << "): ";
+ for (auto I : BB->predecessors())
+ Ns.push_back(I->getNumber());
+ PrintBBs(Ns);
+
+ unsigned NS = BB->succ_size();
+ OS << " succs(" << NS << "): ";
+ Ns.clear();
+ for (auto I : BB->successors())
+ Ns.push_back(I->getNumber());
+ PrintBBs(Ns);
+ OS << '\n';
+
+ for (auto I : P.Obj.Addr->members(P.G))
+ OS << PrintNode<InstrNode*>(I, P.G) << '\n';
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<FuncNode*>> &P) {
+ OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: "
+ << P.Obj.Addr->getCode()->getName() << '\n';
+ for (auto I : P.Obj.Addr->members(P.G))
+ OS << PrintNode<BlockNode*>(I, P.G) << '\n';
+ OS << "]\n";
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
+ OS << '{';
+ for (auto I : P.Obj)
+ OS << ' ' << Print<RegisterRef>(I, P.G);
+ OS << " }";
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P) {
+ for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
+ OS << Print<NodeId>(I->Id, P.G)
+ << '<' << Print<RegisterRef>(I->Addr->getRegRef(), P.G) << '>';
+ I.down();
+ if (I != E)
+ OS << ' ';
+ }
+ return OS;
+}
+
+} // namespace rdf
+
+// Node allocation functions.
+//
+// Node allocator is like a slab memory allocator: it allocates blocks of
+// memory in sizes that are multiples of the size of a node. Each block has
+// the same size. Nodes are allocated from the currently active block, and
+// when it becomes full, a new one is created.
+// There is a mapping scheme between node id and its location in a block,
+// and within that block is described in the header file.
+//
+void NodeAllocator::startNewBlock() {
+ void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize);
+ char *P = static_cast<char*>(T);
+ Blocks.push_back(P);
+ // Check if the block index is still within the allowed range, i.e. less
+ // than 2^N, where N is the number of bits in NodeId for the block index.
+ // BitsPerIndex is the number of bits per node index.
+ assert((Blocks.size() < (1U << (8*sizeof(NodeId)-BitsPerIndex))) &&
+ "Out of bits for block index");
+ ActiveEnd = P;
+}
+
+bool NodeAllocator::needNewBlock() {
+ if (Blocks.empty())
+ return true;
+
+ char *ActiveBegin = Blocks.back();
+ uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize;
+ return Index >= NodesPerBlock;
+}
+
+NodeAddr<NodeBase*> NodeAllocator::New() {
+ if (needNewBlock())
+ startNewBlock();
+
+ uint32_t ActiveB = Blocks.size()-1;
+ uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize;
+ NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd),
+ makeId(ActiveB, Index) };
+ ActiveEnd += NodeMemSize;
+ return NA;
+}
+
+NodeId NodeAllocator::id(const NodeBase *P) const {
+ uintptr_t A = reinterpret_cast<uintptr_t>(P);
+ for (unsigned i = 0, n = Blocks.size(); i != n; ++i) {
+ uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]);
+ if (A < B || A >= B + NodesPerBlock*NodeMemSize)
+ continue;
+ uint32_t Idx = (A-B)/NodeMemSize;
+ return makeId(i, Idx);
+ }
+ llvm_unreachable("Invalid node address");
+}
+
+void NodeAllocator::clear() {
+ MemPool.Reset();
+ Blocks.clear();
+ ActiveEnd = nullptr;
+}
+
+
+// Insert node NA after "this" in the circular chain.
+void NodeBase::append(NodeAddr<NodeBase*> NA) {
+ NodeId Nx = Next;
+ // If NA is already "next", do nothing.
+ if (Next != NA.Id) {
+ Next = NA.Id;
+ NA.Addr->Next = Nx;
+ }
+}
+
+
+// Fundamental node manipulator functions.
+
+// Obtain the register reference from a reference node.
+RegisterRef RefNode::getRegRef() const {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
+ return Ref.RR;
+ assert(Ref.Op != nullptr);
+ return { Ref.Op->getReg(), Ref.Op->getSubReg() };
+}
+
+// Set the register reference in the reference node directly (for references
+// in phi nodes).
+void RefNode::setRegRef(RegisterRef RR) {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
+ Ref.RR = RR;
+}
+
+// Set the register reference in the reference node based on a machine
+// operand (for references in statement nodes).
+void RefNode::setRegRef(MachineOperand *Op) {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
+ Ref.Op = Op;
+}
+
+// Get the owner of a given reference node.
+NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) {
+ NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+
+ while (NA.Addr != this) {
+ if (NA.Addr->getType() == NodeAttrs::Code)
+ return NA;
+ NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ }
+ llvm_unreachable("No owner in circular list");
+}
+
+// Connect the def node to the reaching def node.
+void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
+ Ref.RD = DA.Id;
+ Ref.Sib = DA.Addr->getReachedDef();
+ DA.Addr->setReachedDef(Self);
+}
+
+// Connect the use node to the reaching def node.
+void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
+ Ref.RD = DA.Id;
+ Ref.Sib = DA.Addr->getReachedUse();
+ DA.Addr->setReachedUse(Self);
+}
+
+// Get the first member of the code node.
+NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const {
+ if (Code.FirstM == 0)
+ return NodeAddr<NodeBase*>();
+ return G.addr<NodeBase*>(Code.FirstM);
+}
+
+// Get the last member of the code node.
+NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const {
+ if (Code.LastM == 0)
+ return NodeAddr<NodeBase*>();
+ return G.addr<NodeBase*>(Code.LastM);
+}
+
+// Add node NA at the end of the member list of the given code node.
+void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
+ auto ML = getLastMember(G);
+ if (ML.Id != 0) {
+ ML.Addr->append(NA);
+ } else {
+ Code.FirstM = NA.Id;
+ NodeId Self = G.id(this);
+ NA.Addr->setNext(Self);
+ }
+ Code.LastM = NA.Id;
+}
+
+// Add node NA after member node MA in the given code node.
+void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
+ const DataFlowGraph &G) {
+ MA.Addr->append(NA);
+ if (Code.LastM == MA.Id)
+ Code.LastM = NA.Id;
+}
+
+// Remove member node NA from the given code node.
+void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
+ auto MA = getFirstMember(G);
+ assert(MA.Id != 0);
+
+ // Special handling if the member to remove is the first member.
+ if (MA.Id == NA.Id) {
+ if (Code.LastM == MA.Id) {
+ // If it is the only member, set both first and last to 0.
+ Code.FirstM = Code.LastM = 0;
+ } else {
+ // Otherwise, advance the first member.
+ Code.FirstM = MA.Addr->getNext();
+ }
+ return;
+ }
+
+ while (MA.Addr != this) {
+ NodeId MX = MA.Addr->getNext();
+ if (MX == NA.Id) {
+ MA.Addr->setNext(NA.Addr->getNext());
+ // If the member to remove happens to be the last one, update the
+ // LastM indicator.
+ if (Code.LastM == NA.Id)
+ Code.LastM = MA.Id;
+ return;
+ }
+ MA = G.addr<NodeBase*>(MX);
+ }
+ llvm_unreachable("No such member");
+}
+
+// Return the list of all members of the code node.
+NodeList CodeNode::members(const DataFlowGraph &G) const {
+ static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; };
+ return members_if(True, G);
+}
+
+// Return the owner of the given instr node.
+NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) {
+ NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+
+ while (NA.Addr != this) {
+ assert(NA.Addr->getType() == NodeAttrs::Code);
+ if (NA.Addr->getKind() == NodeAttrs::Block)
+ return NA;
+ NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ }
+ llvm_unreachable("No owner in circular list");
+}
+
+// Add the phi node PA to the given block node.
+void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
+ auto M = getFirstMember(G);
+ if (M.Id == 0) {
+ addMember(PA, G);
+ return;
+ }
+
+ assert(M.Addr->getType() == NodeAttrs::Code);
+ if (M.Addr->getKind() == NodeAttrs::Stmt) {
+ // If the first member of the block is a statement, insert the phi as
+ // the first member.
+ Code.FirstM = PA.Id;
+ PA.Addr->setNext(M.Id);
+ } else {
+ // If the first member is a phi, find the last phi, and append PA to it.
+ assert(M.Addr->getKind() == NodeAttrs::Phi);
+ NodeAddr<NodeBase*> MN = M;
+ do {
+ M = MN;
+ MN = G.addr<NodeBase*>(M.Addr->getNext());
+ assert(MN.Addr->getType() == NodeAttrs::Code);
+ } while (MN.Addr->getKind() == NodeAttrs::Phi);
+
+ // M is the last phi.
+ addMemberAfter(M, PA, G);
+ }
+}
+
+// Find the block node corresponding to the machine basic block BB in the
+// given func node.
+NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const {
+ auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool {
+ return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB;
+ };
+ NodeList Ms = members_if(EqBB, G);
+ if (!Ms.empty())
+ return Ms[0];
+ return NodeAddr<BlockNode*>();
+}
+
+// Get the block node for the entry block in the given function.
+NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
+ MachineBasicBlock *EntryB = &getCode()->front();
+ return findBlock(EntryB, G);
+}
+
+
+// Register aliasing information.
+//
+// In theory, the lane information could be used to determine register
+// covering (and aliasing), but depending on the sub-register structure,
+// the lane mask information may be missing. The covering information
+// must be available for this framework to work, so relying solely on
+// the lane data is not sufficient.
+
+// Determine whether RA covers RB.
+bool RegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
+ if (RA == RB)
+ return true;
+ if (TargetRegisterInfo::isVirtualRegister(RA.Reg)) {
+ assert(TargetRegisterInfo::isVirtualRegister(RB.Reg));
+ if (RA.Reg != RB.Reg)
+ return false;
+ if (RA.Sub == 0)
+ return true;
+ return TRI.composeSubRegIndices(RA.Sub, RB.Sub) == RA.Sub;
+ }
+
+ assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg) &&
+ TargetRegisterInfo::isPhysicalRegister(RB.Reg));
+ unsigned A = RA.Sub != 0 ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
+ unsigned B = RB.Sub != 0 ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
+ return TRI.isSubRegister(A, B);
+}
+
+// Determine whether RR is covered by the set of references RRs.
+bool RegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) const {
+ if (RRs.count(RR))
+ return true;
+
+ // For virtual registers, we cannot accurately determine covering based
+ // on subregisters. If RR itself is not present in RRs, but it has a sub-
+ // register reference, check for the super-register alone. Otherwise,
+ // assume non-covering.
+ if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+ if (RR.Sub != 0)
+ return RRs.count({RR.Reg, 0});
+ return false;
+ }
+
+ // If any super-register of RR is present, then RR is covered.
+ unsigned Reg = RR.Sub == 0 ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub);
+ for (MCSuperRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
+ if (RRs.count({*SR, 0}))
+ return true;
+
+ return false;
+}
+
+// Get the list of references aliased to RR.
+std::vector<RegisterRef> RegisterAliasInfo::getAliasSet(RegisterRef RR) const {
+ // Do not include RR in the alias set. For virtual registers return an
+ // empty set.
+ std::vector<RegisterRef> AS;
+ if (TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return AS;
+ assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
+ unsigned R = RR.Reg;
+ if (RR.Sub)
+ R = TRI.getSubReg(RR.Reg, RR.Sub);
+
+ for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+ AS.push_back(RegisterRef({*AI, 0}));
+ return AS;
+}
+
+// Check whether RA and RB are aliased.
+bool RegisterAliasInfo::alias(RegisterRef RA, RegisterRef RB) const {
+ bool VirtA = TargetRegisterInfo::isVirtualRegister(RA.Reg);
+ bool VirtB = TargetRegisterInfo::isVirtualRegister(RB.Reg);
+ bool PhysA = TargetRegisterInfo::isPhysicalRegister(RA.Reg);
+ bool PhysB = TargetRegisterInfo::isPhysicalRegister(RB.Reg);
+
+ if (VirtA != VirtB)
+ return false;
+
+ if (VirtA) {
+ if (RA.Reg != RB.Reg)
+ return false;
+ // RA and RB refer to the same register. If any of them refer to the
+ // whole register, they must be aliased.
+ if (RA.Sub == 0 || RB.Sub == 0)
+ return true;
+ unsigned SA = TRI.getSubRegIdxSize(RA.Sub);
+ unsigned OA = TRI.getSubRegIdxOffset(RA.Sub);
+ unsigned SB = TRI.getSubRegIdxSize(RB.Sub);
+ unsigned OB = TRI.getSubRegIdxOffset(RB.Sub);
+ if (OA <= OB && OA+SA > OB)
+ return true;
+ if (OB <= OA && OB+SB > OA)
+ return true;
+ return false;
+ }
+
+ assert(PhysA && PhysB);
+ (void)PhysA, (void)PhysB;
+ unsigned A = RA.Sub ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
+ unsigned B = RB.Sub ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
+ for (MCRegAliasIterator I(A, &TRI, true); I.isValid(); ++I)
+ if (B == *I)
+ return true;
+ return false;
+}
+
+
+// Target operand information.
+//
+
+// For a given instruction, check if there are any bits of RR that can remain
+// unchanged across this def.
+bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
+ const {
+ return TII.isPredicated(&In);
+}
+
+// Check if the definition of RR produces an unspecified value.
+bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
+ const {
+ if (In.isCall())
+ if (In.getOperand(OpNum).isImplicit())
+ return true;
+ return false;
+}
+
+// Check if the given instruction specifically requires
+bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
+ const {
+ if (In.isCall() || In.isReturn())
+ return true;
+ const MCInstrDesc &D = In.getDesc();
+ if (!D.getImplicitDefs() && !D.getImplicitUses())
+ return false;
+ const MachineOperand &Op = In.getOperand(OpNum);
+ // If there is a sub-register, treat the operand as non-fixed. Currently,
+ // fixed registers are those that are listed in the descriptor as implicit
+ // uses or defs, and those lists do not allow sub-registers.
+ if (Op.getSubReg() != 0)
+ return false;
+ unsigned Reg = Op.getReg();
+ const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs()
+ : D.getImplicitUses();
+ if (!ImpR)
+ return false;
+ while (*ImpR)
+ if (*ImpR++ == Reg)
+ return true;
+ return false;
+}
+
+
+//
+// The data flow graph construction.
+//
+
+DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
+ const TargetOperandInfo &toi)
+ : TimeG("rdf"), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), RAI(rai),
+ TOI(toi) {
+}
+
+
+// The implementation of the definition stack.
+// Each register reference has its own definition stack. In particular,
+// for a register references "Reg" and "Reg:subreg" will each have their
+// own definition stacks.
+
+// Construct a stack iterator.
+DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
+ bool Top) : DS(S) {
+ if (!Top) {
+ // Initialize to bottom.
+ Pos = 0;
+ return;
+ }
+ // Initialize to the top, i.e. top-most non-delimiter (or 0, if empty).
+ Pos = DS.Stack.size();
+ while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1]))
+ Pos--;
+}
+
+// Return the size of the stack, including block delimiters.
+unsigned DataFlowGraph::DefStack::size() const {
+ unsigned S = 0;
+ for (auto I = top(), E = bottom(); I != E; I.down())
+ S++;
+ return S;
+}
+
+// Remove the top entry from the stack. Remove all intervening delimiters
+// so that after this, the stack is either empty, or the top of the stack
+// is a non-delimiter.
+void DataFlowGraph::DefStack::pop() {
+ assert(!empty());
+ unsigned P = nextDown(Stack.size());
+ Stack.resize(P);
+}
+
+// Push a delimiter for block node N on the stack.
+void DataFlowGraph::DefStack::start_block(NodeId N) {
+ assert(N != 0);
+ Stack.push_back(NodeAddr<DefNode*>(nullptr, N));
+}
+
+// Remove all nodes from the top of the stack, until the delimited for
+// block node N is encountered. Remove the delimiter as well. In effect,
+// this will remove from the stack all definitions from block N.
+void DataFlowGraph::DefStack::clear_block(NodeId N) {
+ assert(N != 0);
+ unsigned P = Stack.size();
+ while (P > 0) {
+ bool Found = isDelimiter(Stack[P-1], N);
+ P--;
+ if (Found)
+ break;
+ }
+ // This will also remove the delimiter, if found.
+ Stack.resize(P);
+}
+
+// Move the stack iterator up by one.
+unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const {
+ // Get the next valid position after P (skipping all delimiters).
+ // The input position P does not have to point to a non-delimiter.
+ unsigned SS = Stack.size();
+ bool IsDelim;
+ assert(P < SS);
+ do {
+ P++;
+ IsDelim = isDelimiter(Stack[P-1]);
+ } while (P < SS && IsDelim);
+ assert(!IsDelim);
+ return P;
+}
+
+// Move the stack iterator down by one.
+unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
+ // Get the preceding valid position before P (skipping all delimiters).
+ // The input position P does not have to point to a non-delimiter.
+ assert(P > 0 && P <= Stack.size());
+ bool IsDelim = isDelimiter(Stack[P-1]);
+ do {
+ if (--P == 0)
+ break;
+ IsDelim = isDelimiter(Stack[P-1]);
+ } while (P > 0 && IsDelim);
+ assert(!IsDelim);
+ return P;
+}
+
+// Node management functions.
+
+// Get the pointer to the node with the id N.
+NodeBase *DataFlowGraph::ptr(NodeId N) const {
+ if (N == 0)
+ return nullptr;
+ return Memory.ptr(N);
+}
+
+// Get the id of the node at the address P.
+NodeId DataFlowGraph::id(const NodeBase *P) const {
+ if (P == nullptr)
+ return 0;
+ return Memory.id(P);
+}
+
+// Allocate a new node and set the attributes to Attrs.
+NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
+ NodeAddr<NodeBase*> P = Memory.New();
+ P.Addr->init();
+ P.Addr->setAttrs(Attrs);
+ return P;
+}
+
+// Make a copy of the given node B, except for the data-flow links, which
+// are set to 0.
+NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
+ NodeAddr<NodeBase*> NA = newNode(0);
+ memcpy(NA.Addr, B.Addr, sizeof(NodeBase));
+ // Ref nodes need to have the data-flow links reset.
+ if (NA.Addr->getType() == NodeAttrs::Ref) {
+ NodeAddr<RefNode*> RA = NA;
+ RA.Addr->setReachingDef(0);
+ RA.Addr->setSibling(0);
+ if (NA.Addr->getKind() == NodeAttrs::Def) {
+ NodeAddr<DefNode*> DA = NA;
+ DA.Addr->setReachedDef(0);
+ DA.Addr->setReachedUse(0);
+ }
+ }
+ return NA;
+}
+
+
+// Allocation routines for specific node types/kinds.
+
+NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags) {
+ NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+ UA.Addr->setRegRef(&Op);
+ return UA;
+}
+
+NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
+ RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) {
+ NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+ assert(Flags & NodeAttrs::PhiRef);
+ PUA.Addr->setRegRef(RR);
+ PUA.Addr->setPredecessor(PredB.Id);
+ return PUA;
+}
+
+NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags) {
+ NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+ DA.Addr->setRegRef(&Op);
+ return DA;
+}
+
+NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
+ RegisterRef RR, uint16_t Flags) {
+ NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+ assert(Flags & NodeAttrs::PhiRef);
+ DA.Addr->setRegRef(RR);
+ return DA;
+}
+
+NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) {
+ NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
+ Owner.Addr->addPhi(PA, *this);
+ return PA;
+}
+
+NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner,
+ MachineInstr *MI) {
+ NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
+ SA.Addr->setCode(MI);
+ Owner.Addr->addMember(SA, *this);
+ return SA;
+}
+
+NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner,
+ MachineBasicBlock *BB) {
+ NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
+ BA.Addr->setCode(BB);
+ Owner.Addr->addMember(BA, *this);
+ return BA;
+}
+
+NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) {
+ NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
+ FA.Addr->setCode(MF);
+ return FA;
+}
+
+// Build the data flow graph.
+void DataFlowGraph::build() {
+ reset();
+ Func = newFunc(&MF);
+
+ if (MF.empty())
+ return;
+
+ for (auto &B : MF) {
+ auto BA = newBlock(Func, &B);
+ for (auto &I : B) {
+ if (I.isDebugValue())
+ continue;
+ buildStmt(BA, I);
+ }
+ }
+
+ // Collect information about block references.
+ NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this);
+ BlockRefsMap RefM;
+ buildBlockRefs(EA, RefM);
+
+ // Add function-entry phi nodes.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+ NodeAddr<PhiNode*> PA = newPhi(EA);
+ RegisterRef RR = { I->first, 0 };
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+ NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ PA.Addr->addMember(DA, *this);
+ }
+
+ // Build a map "PhiM" which will contain, for each block, the set
+ // of references that will require phi definitions in that block.
+ BlockRefsMap PhiM;
+ auto Blocks = Func.Addr->members(*this);
+ for (NodeAddr<BlockNode*> BA : Blocks)
+ recordDefsForDF(PhiM, RefM, BA);
+ for (NodeAddr<BlockNode*> BA : Blocks)
+ buildPhis(PhiM, RefM, BA);
+
+ // Link all the refs. This will recursively traverse the dominator tree.
+ DefStackMap DM;
+ linkBlockRefs(DM, EA);
+
+ // Finally, remove all unused phi nodes.
+ removeUnusedPhis();
+}
+
+// For each stack in the map DefM, push the delimiter for block B on it.
+void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
+ // Push block delimiters.
+ for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
+ I->second.start_block(B);
+}
+
+// Remove all definitions coming from block B from each stack in DefM.
+void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
+ // Pop all defs from this block from the definition stack. Defs that were
+ // added to the map during the traversal of instructions will not have a
+ // delimiter, but for those, the whole stack will be emptied.
+ for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
+ I->second.clear_block(B);
+
+ // Finally, remove empty stacks from the map.
+ for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) {
+ NextI = std::next(I);
+ // This preserves the validity of iterators other than I.
+ if (I->second.empty())
+ DefM.erase(I);
+ }
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+ NodeList Defs = IA.Addr->members_if(IsDef, *this);
+ NodeSet Visited;
+#ifndef NDEBUG
+ RegisterSet Defined;
+#endif
+
+ // The important objectives of this function are:
+ // - to be able to handle instructions both while the graph is being
+ // constructed, and after the graph has been constructed, and
+ // - maintain proper ordering of definitions on the stack for each
+ // register reference:
+ // - if there are two or more related defs in IA (i.e. coming from
+ // the same machine operand), then only push one def on the stack,
+ // - if there are multiple unrelated defs of non-overlapping
+ // subregisters of S, then the stack for S will have both (in an
+ // unspecified order), but the order does not matter from the data-
+ // -flow perspective.
+
+ for (NodeAddr<DefNode*> DA : Defs) {
+ if (Visited.count(DA.Id))
+ continue;
+ NodeList Rel = getRelatedRefs(IA, DA);
+ NodeAddr<DefNode*> PDA = Rel.front();
+ // Push the definition on the stack for the register and all aliases.
+ RegisterRef RR = PDA.Addr->getRegRef();
+#ifndef NDEBUG
+ // Assert if the register is defined in two or more unrelated defs.
+ // This could happen if there are two or more def operands defining it.
+ if (!Defined.insert(RR).second) {
+ auto *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ dbgs() << "Multiple definitions of register: "
+ << Print<RegisterRef>(RR, *this) << " in\n " << *MI
+ << "in BB#" << MI->getParent()->getNumber() << '\n';
+ llvm_unreachable(nullptr);
+ }
+#endif
+ DefM[RR].push(DA);
+ for (auto A : RAI.getAliasSet(RR)) {
+ assert(A != RR);
+ DefM[A].push(DA);
+ }
+ // Mark all the related defs as visited.
+ for (auto T : Rel)
+ Visited.insert(T.Id);
+ }
+}
+
+// Return the list of all reference nodes related to RA, including RA itself.
+// See "getNextRelated" for the meaning of a "related reference".
+NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ NodeList Refs;
+ NodeId Start = RA.Id;
+ do {
+ Refs.push_back(RA);
+ RA = getNextRelated(IA, RA);
+ } while (RA.Id != 0 && RA.Id != Start);
+ return Refs;
+}
+
+
+// Clear all information in the graph.
+void DataFlowGraph::reset() {
+ Memory.clear();
+ Func = NodeAddr<FuncNode*>();
+}
+
+
+// Return the next reference node in the instruction node IA that is related
+// to RA. Conceptually, two reference nodes are related if they refer to the
+// same instance of a register access, but differ in flags or other minor
+// characteristics. Specific examples of related nodes are shadow reference
+// nodes.
+// Return the equivalent of nullptr if there are no more related references.
+NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ auto Related = [RA](NodeAddr<RefNode*> TA) -> bool {
+ if (TA.Addr->getKind() != RA.Addr->getKind())
+ return false;
+ if (TA.Addr->getRegRef() != RA.Addr->getRegRef())
+ return false;
+ return true;
+ };
+ auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
+ return Related(TA) &&
+ &RA.Addr->getOp() == &TA.Addr->getOp();
+ };
+ auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
+ if (!Related(TA))
+ return false;
+ if (TA.Addr->getKind() != NodeAttrs::Use)
+ return true;
+ // For phi uses, compare predecessor blocks.
+ const NodeAddr<const PhiUseNode*> TUA = TA;
+ const NodeAddr<const PhiUseNode*> RUA = RA;
+ return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor();
+ };
+
+ RegisterRef RR = RA.Addr->getRegRef();
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ return RA.Addr->getNextRef(RR, RelatedStmt, true, *this);
+ return RA.Addr->getNextRef(RR, RelatedPhi, true, *this);
+}
+
+// Find the next node related to RA in IA that satisfies condition P.
+// If such a node was found, return a pair where the second element is the
+// located node. If such a node does not exist, return a pair where the
+// first element is the element after which such a node should be inserted,
+// and the second element is a null-address.
+template <typename Predicate>
+std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
+DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ Predicate P) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ NodeAddr<RefNode*> NA;
+ NodeId Start = RA.Id;
+ while (true) {
+ NA = getNextRelated(IA, RA);
+ if (NA.Id == 0 || NA.Id == Start)
+ break;
+ if (P(NA))
+ break;
+ RA = NA;
+ }
+
+ if (NA.Id != 0 && NA.Id != Start)
+ return std::make_pair(RA, NA);
+ return std::make_pair(RA, NodeAddr<RefNode*>());
+}
+
+// Get the next shadow node in IA corresponding to RA, and optionally create
+// such a node if it does not exist.
+NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA, bool Create) {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
+ auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+ return TA.Addr->getFlags() == Flags;
+ };
+ auto Loc = locateNextRef(IA, RA, IsShadow);
+ if (Loc.second.Id != 0 || !Create)
+ return Loc.second;
+
+ // Create a copy of RA and mark is as shadow.
+ NodeAddr<RefNode*> NA = cloneNode(RA);
+ NA.Addr->setFlags(Flags | NodeAttrs::Shadow);
+ IA.Addr->addMemberAfter(Loc.first, NA, *this);
+ return NA;
+}
+
+// Get the next shadow node in IA corresponding to RA. Return null-address
+// if such a node does not exist.
+NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+ uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
+ auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+ return TA.Addr->getFlags() == Flags;
+ };
+ return locateNextRef(IA, RA, IsShadow).second;
+}
+
+// Create a new statement node in the block node BA that corresponds to
+// the machine instruction MI.
+void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
+ auto SA = newStmt(BA, &In);
+
+ // Collect a set of registers that this instruction implicitly uses
+ // or defines. Implicit operands from an instruction will be ignored
+ // unless they are listed here.
+ RegisterSet ImpUses, ImpDefs;
+ if (const uint16_t *ImpD = In.getDesc().getImplicitDefs())
+ while (uint16_t R = *ImpD++)
+ ImpDefs.insert({R, 0});
+ if (const uint16_t *ImpU = In.getDesc().getImplicitUses())
+ while (uint16_t R = *ImpU++)
+ ImpUses.insert({R, 0});
+
+ bool IsCall = In.isCall(), IsReturn = In.isReturn();
+ bool IsPredicated = TII.isPredicated(&In);
+ unsigned NumOps = In.getNumOperands();
+
+ // Avoid duplicate implicit defs. This will not detect cases of implicit
+ // defs that define registers that overlap, but it is not clear how to
+ // interpret that in the absence of explicit defs. Overlapping explicit
+ // defs are likely illegal already.
+ RegisterSet DoneDefs;
+ // Process explicit defs first.
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ continue;
+ RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isPreserving(In, OpN))
+ Flags |= NodeAttrs::Preserving;
+ if (TOI.isClobbering(In, OpN))
+ Flags |= NodeAttrs::Clobbering;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ DoneDefs.insert(RR);
+ }
+
+ // Process implicit defs, skipping those that have already been added
+ // as explicit.
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
+ continue;
+ RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+ if (!IsCall && !ImpDefs.count(RR))
+ continue;
+ if (DoneDefs.count(RR))
+ continue;
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isPreserving(In, OpN))
+ Flags |= NodeAttrs::Preserving;
+ if (TOI.isClobbering(In, OpN))
+ Flags |= NodeAttrs::Clobbering;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ DoneDefs.insert(RR);
+ }
+
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+ // Add implicit uses on return and call instructions, and on predicated
+ // instructions regardless of whether or not they appear in the instruction
+ // descriptor's list.
+ bool Implicit = Op.isImplicit();
+ bool TakeImplicit = IsReturn || IsCall || IsPredicated;
+ if (Implicit && !TakeImplicit && !ImpUses.count(RR))
+ continue;
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ NodeAddr<UseNode*> UA = newUse(SA, Op, Flags);
+ SA.Addr->addMember(UA, *this);
+ }
+}
+
+// Build a map that for each block will have the set of all references from
+// that block, and from all blocks dominated by it.
+void DataFlowGraph::buildBlockRefs(NodeAddr<BlockNode*> BA,
+ BlockRefsMap &RefM) {
+ auto &Refs = RefM[BA.Id];
+ MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
+ assert(N);
+ for (auto I : *N) {
+ MachineBasicBlock *SB = I->getBlock();
+ auto SBA = Func.Addr->findBlock(SB, *this);
+ buildBlockRefs(SBA, RefM);
+ const auto &SRs = RefM[SBA.Id];
+ Refs.insert(SRs.begin(), SRs.end());
+ }
+
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(*this))
+ Refs.insert(RA.Addr->getRegRef());
+}
+
+// Scan all defs in the block node BA and record in PhiM the locations of
+// phi nodes corresponding to these defs.
+void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA) {
+ // Check all defs from block BA and record them in each block in BA's
+ // iterated dominance frontier. This information will later be used to
+ // create phi nodes.
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ assert(BB);
+ auto DFLoc = MDF.find(BB);
+ if (DFLoc == MDF.end() || DFLoc->second.empty())
+ return;
+
+ // Traverse all instructions in the block and collect the set of all
+ // defined references. For each reference there will be a phi created
+ // in the block's iterated dominance frontier.
+ // This is done to make sure that each defined reference gets only one
+ // phi node, even if it is defined multiple times.
+ RegisterSet Defs;
+ for (auto I : BA.Addr->members(*this)) {
+ assert(I.Addr->getType() == NodeAttrs::Code);
+ assert(I.Addr->getKind() == NodeAttrs::Phi ||
+ I.Addr->getKind() == NodeAttrs::Stmt);
+ NodeAddr<InstrNode*> IA = I;
+ for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this))
+ Defs.insert(RA.Addr->getRegRef());
+ }
+
+ // Finally, add the set of defs to each block in the iterated dominance
+ // frontier.
+ const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
+ SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end());
+ for (unsigned i = 0; i < IDF.size(); ++i) {
+ auto F = MDF.find(IDF[i]);
+ if (F != MDF.end())
+ IDF.insert(F->second.begin(), F->second.end());
+ }
+
+ // Get the register references that are reachable from this block.
+ RegisterSet &Refs = RefM[BA.Id];
+ for (auto DB : IDF) {
+ auto DBA = Func.Addr->findBlock(DB, *this);
+ const auto &Rs = RefM[DBA.Id];
+ Refs.insert(Rs.begin(), Rs.end());
+ }
+
+ for (auto DB : IDF) {
+ auto DBA = Func.Addr->findBlock(DB, *this);
+ PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
+ }
+}
+
+// Given the locations of phi nodes in the map PhiM, create the phi nodes
+// that are located in the block node BA.
+void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA) {
+ // Check if this blocks has any DF defs, i.e. if there are any defs
+ // that this block is in the iterated dominance frontier of.
+ auto HasDF = PhiM.find(BA.Id);
+ if (HasDF == PhiM.end() || HasDF->second.empty())
+ return;
+
+ // First, remove all R in Refs in such that there exists T in Refs
+ // such that T covers R. In other words, only leave those refs that
+ // are not covered by another ref (i.e. maximal with respect to covering).
+
+ auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
+ for (auto I : RRs)
+ if (I != RR && RAI.covers(I, RR))
+ RR = I;
+ return RR;
+ };
+
+ RegisterSet MaxDF;
+ for (auto I : HasDF->second)
+ MaxDF.insert(MaxCoverIn(I, HasDF->second));
+
+ std::vector<RegisterRef> MaxRefs;
+ auto &RefB = RefM[BA.Id];
+ for (auto I : MaxDF)
+ MaxRefs.push_back(MaxCoverIn(I, RefB));
+
+ // Now, for each R in MaxRefs, get the alias closure of R. If the closure
+ // only has R in it, create a phi a def for R. Otherwise, create a phi,
+ // and add a def for each S in the closure.
+
+ // Sort the refs so that the phis will be created in a deterministic order.
+ std::sort(MaxRefs.begin(), MaxRefs.end());
+ // Remove duplicates.
+ auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
+ MaxRefs.erase(NewEnd, MaxRefs.end());
+
+ auto Aliased = [this,&MaxRefs](RegisterRef RR,
+ std::vector<unsigned> &Closure) -> bool {
+ for (auto I : Closure)
+ if (RAI.alias(RR, MaxRefs[I]))
+ return true;
+ return false;
+ };
+
+ // Prepare a list of NodeIds of the block's predecessors.
+ std::vector<NodeId> PredList;
+ const MachineBasicBlock *MBB = BA.Addr->getCode();
+ for (auto PB : MBB->predecessors()) {
+ auto B = Func.Addr->findBlock(PB, *this);
+ PredList.push_back(B.Id);
+ }
+
+ while (!MaxRefs.empty()) {
+ // Put the first element in the closure, and then add all subsequent
+ // elements from MaxRefs to it, if they alias at least one element
+ // already in the closure.
+ // ClosureIdx: vector of indices in MaxRefs of members of the closure.
+ std::vector<unsigned> ClosureIdx = { 0 };
+ for (unsigned i = 1; i != MaxRefs.size(); ++i)
+ if (Aliased(MaxRefs[i], ClosureIdx))
+ ClosureIdx.push_back(i);
+
+ // Build a phi for the closure.
+ unsigned CS = ClosureIdx.size();
+ NodeAddr<PhiNode*> PA = newPhi(BA);
+
+ // Add defs.
+ for (unsigned X = 0; X != CS; ++X) {
+ RegisterRef RR = MaxRefs[ClosureIdx[X]];
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+ NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ PA.Addr->addMember(DA, *this);
+ }
+ // Add phi uses.
+ for (auto P : PredList) {
+ auto PBA = addr<BlockNode*>(P);
+ for (unsigned X = 0; X != CS; ++X) {
+ RegisterRef RR = MaxRefs[ClosureIdx[X]];
+ NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
+ PA.Addr->addMember(PUA, *this);
+ }
+ }
+
+ // Erase from MaxRefs all elements in the closure.
+ auto Begin = MaxRefs.begin();
+ for (unsigned i = ClosureIdx.size(); i != 0; --i)
+ MaxRefs.erase(Begin + ClosureIdx[i-1]);
+ }
+}
+
+// Remove any unneeded phi nodes that were created during the build process.
+void DataFlowGraph::removeUnusedPhis() {
+ // This will remove unused phis, i.e. phis where each def does not reach
+ // any uses or other defs. This will not detect or remove circular phi
+ // chains that are otherwise dead. Unused/dead phis are created during
+ // the build process and this function is intended to remove these cases
+ // that are easily determinable to be unnecessary.
+
+ SetVector<NodeId> PhiQ;
+ for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) {
+ for (auto P : BA.Addr->members_if(IsPhi, *this))
+ PhiQ.insert(P.Id);
+ }
+
+ static auto HasUsedDef = [](NodeList &Ms) -> bool {
+ for (auto M : Ms) {
+ if (M.Addr->getKind() != NodeAttrs::Def)
+ continue;
+ NodeAddr<DefNode*> DA = M;
+ if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0)
+ return true;
+ }
+ return false;
+ };
+
+ // Any phi, if it is removed, may affect other phis (make them dead).
+ // For each removed phi, collect the potentially affected phis and add
+ // them back to the queue.
+ while (!PhiQ.empty()) {
+ auto PA = addr<PhiNode*>(PhiQ[0]);
+ PhiQ.remove(PA.Id);
+ NodeList Refs = PA.Addr->members(*this);
+ if (HasUsedDef(Refs))
+ continue;
+ for (NodeAddr<RefNode*> RA : Refs) {
+ if (NodeId RD = RA.Addr->getReachingDef()) {
+ auto RDA = addr<DefNode*>(RD);
+ NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this);
+ if (IsPhi(OA))
+ PhiQ.insert(OA.Id);
+ }
+ if (RA.Addr->isDef())
+ unlinkDef(RA);
+ else
+ unlinkUse(RA);
+ }
+ NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this);
+ BA.Addr->removeMember(PA, *this);
+ }
+}
+
+// For a given reference node TA in an instruction node IA, connect the
+// reaching def of TA to the appropriate def node. Create any shadow nodes
+// as appropriate.
+template <typename T>
+void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
+ DefStack &DS) {
+ if (DS.empty())
+ return;
+ RegisterRef RR = TA.Addr->getRegRef();
+ NodeAddr<T> TAP;
+
+ // References from the def stack that have been examined so far.
+ RegisterSet Defs;
+
+ for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
+ RegisterRef QR = I->Addr->getRegRef();
+ auto AliasQR = [QR,this] (RegisterRef RR) -> bool {
+ return RAI.alias(QR, RR);
+ };
+ bool PrecUp = RAI.covers(QR, RR);
+ // Skip all defs that are aliased to any of the defs that we have already
+ // seen. If we encounter a covering def, stop the stack traversal early.
+ if (std::any_of(Defs.begin(), Defs.end(), AliasQR)) {
+ if (PrecUp)
+ break;
+ continue;
+ }
+ // The reaching def.
+ NodeAddr<DefNode*> RDA = *I;
+
+ // Pick the reached node.
+ if (TAP.Id == 0) {
+ TAP = TA;
+ } else {
+ // Mark the existing ref as "shadow" and create a new shadow.
+ TAP.Addr->setFlags(TAP.Addr->getFlags() | NodeAttrs::Shadow);
+ TAP = getNextShadow(IA, TAP, true);
+ }
+
+ // Create the link.
+ TAP.Addr->linkToDef(TAP.Id, RDA);
+
+ if (PrecUp)
+ break;
+ Defs.insert(QR);
+ }
+}
+
+// Create data-flow links for all reference nodes in the statement node SA.
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) {
+ RegisterSet Defs;
+
+ // Link all nodes (upwards in the data-flow) with their reaching defs.
+ for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) {
+ uint16_t Kind = RA.Addr->getKind();
+ assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
+ RegisterRef RR = RA.Addr->getRegRef();
+ // Do not process multiple defs of the same reference.
+ if (Kind == NodeAttrs::Def && Defs.count(RR))
+ continue;
+ Defs.insert(RR);
+
+ auto F = DefM.find(RR);
+ if (F == DefM.end())
+ continue;
+ DefStack &DS = F->second;
+ if (Kind == NodeAttrs::Use)
+ linkRefUp<UseNode*>(SA, RA, DS);
+ else if (Kind == NodeAttrs::Def)
+ linkRefUp<DefNode*>(SA, RA, DS);
+ else
+ llvm_unreachable("Unexpected node in instruction");
+ }
+}
+
+// Create data-flow links for all instructions in the block node BA. This
+// will include updating any phi nodes in BA.
+void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
+ // Push block delimiters.
+ markBlock(BA.Id, DefM);
+
+ // For each non-phi instruction in the block, link all the defs and uses
+ // to their reaching defs. For any member of the block (including phis),
+ // push the defs on the corresponding stacks.
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
+ // Ignore phi nodes here. They will be linked part by part from the
+ // predecessors.
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ linkStmtRefs(DefM, IA);
+
+ // Push the definitions on the stack.
+ pushDefs(IA, DefM);
+ }
+
+ // Recursively process all children in the dominator tree.
+ MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
+ for (auto I : *N) {
+ MachineBasicBlock *SB = I->getBlock();
+ auto SBA = Func.Addr->findBlock(SB, *this);
+ linkBlockRefs(DefM, SBA);
+ }
+
+ // Link the phi uses from the successor blocks.
+ auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool {
+ if (NA.Addr->getKind() != NodeAttrs::Use)
+ return false;
+ assert(NA.Addr->getFlags() & NodeAttrs::PhiRef);
+ NodeAddr<PhiUseNode*> PUA = NA;
+ return PUA.Addr->getPredecessor() == BA.Id;
+ };
+ MachineBasicBlock *MBB = BA.Addr->getCode();
+ for (auto SB : MBB->successors()) {
+ auto SBA = Func.Addr->findBlock(SB, *this);
+ for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) {
+ // Go over each phi use associated with MBB, and link it.
+ for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
+ NodeAddr<PhiUseNode*> PUA = U;
+ RegisterRef RR = PUA.Addr->getRegRef();
+ linkRefUp<UseNode*>(IA, PUA, DefM[RR]);
+ }
+ }
+ }
+
+ // Pop all defs from this block from the definition stacks.
+ releaseBlock(BA.Id, DefM);
+}
+
+// Remove the use node UA from any data-flow and structural links.
+void DataFlowGraph::unlinkUse(NodeAddr<UseNode*> UA) {
+ NodeId RD = UA.Addr->getReachingDef();
+ NodeId Sib = UA.Addr->getSibling();
+
+ NodeAddr<InstrNode*> IA = UA.Addr->getOwner(*this);
+ IA.Addr->removeMember(UA, *this);
+
+ if (RD == 0) {
+ assert(Sib == 0);
+ return;
+ }
+
+ auto RDA = addr<DefNode*>(RD);
+ auto TA = addr<UseNode*>(RDA.Addr->getReachedUse());
+ if (TA.Id == UA.Id) {
+ RDA.Addr->setReachedUse(Sib);
+ return;
+ }
+
+ while (TA.Id != 0) {
+ NodeId S = TA.Addr->getSibling();
+ if (S == UA.Id) {
+ TA.Addr->setSibling(UA.Addr->getSibling());
+ return;
+ }
+ TA = addr<UseNode*>(S);
+ }
+}
+
+// Remove the def node DA from any data-flow and structural links.
+void DataFlowGraph::unlinkDef(NodeAddr<DefNode*> DA) {
+ //
+ // RD
+ // | reached
+ // | def
+ // :
+ // .
+ // +----+
+ // ... -- | DA | -- ... -- 0 : sibling chain of DA
+ // +----+
+ // | | reached
+ // | : def
+ // | .
+ // | ... : Siblings (defs)
+ // |
+ // : reached
+ // . use
+ // ... : sibling chain of reached uses
+
+ NodeId RD = DA.Addr->getReachingDef();
+
+ // Visit all siblings of the reached def and reset their reaching defs.
+ // Also, defs reached by DA are now "promoted" to being reached by RD,
+ // so all of them will need to be spliced into the sibling chain where
+ // DA belongs.
+ auto getAllNodes = [this] (NodeId N) -> NodeList {
+ NodeList Res;
+ while (N) {
+ auto RA = addr<RefNode*>(N);
+ // Keep the nodes in the exact sibling order.
+ Res.push_back(RA);
+ N = RA.Addr->getSibling();
+ }
+ return Res;
+ };
+ NodeList ReachedDefs = getAllNodes(DA.Addr->getReachedDef());
+ NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse());
+
+ if (RD == 0) {
+ for (NodeAddr<RefNode*> I : ReachedDefs)
+ I.Addr->setSibling(0);
+ for (NodeAddr<RefNode*> I : ReachedUses)
+ I.Addr->setSibling(0);
+ }
+ for (NodeAddr<DefNode*> I : ReachedDefs)
+ I.Addr->setReachingDef(RD);
+ for (NodeAddr<UseNode*> I : ReachedUses)
+ I.Addr->setReachingDef(RD);
+
+ NodeId Sib = DA.Addr->getSibling();
+ if (RD == 0) {
+ assert(Sib == 0);
+ return;
+ }
+
+ // Update the reaching def node and remove DA from the sibling list.
+ auto RDA = addr<DefNode*>(RD);
+ auto TA = addr<DefNode*>(RDA.Addr->getReachedDef());
+ if (TA.Id == DA.Id) {
+ // If DA is the first reached def, just update the RD's reached def
+ // to the DA's sibling.
+ RDA.Addr->setReachedDef(Sib);
+ } else {
+ // Otherwise, traverse the sibling list of the reached defs and remove
+ // DA from it.
+ while (TA.Id != 0) {
+ NodeId S = TA.Addr->getSibling();
+ if (S == DA.Id) {
+ TA.Addr->setSibling(Sib);
+ break;
+ }
+ TA = addr<DefNode*>(S);
+ }
+ }
+
+ // Splice the DA's reached defs into the RDA's reached def chain.
+ if (!ReachedDefs.empty()) {
+ auto Last = NodeAddr<DefNode*>(ReachedDefs.back());
+ Last.Addr->setSibling(RDA.Addr->getReachedDef());
+ RDA.Addr->setReachedDef(ReachedDefs.front().Id);
+ }
+ // Splice the DA's reached uses into the RDA's reached use chain.
+ if (!ReachedUses.empty()) {
+ auto Last = NodeAddr<UseNode*>(ReachedUses.back());
+ Last.Addr->setSibling(RDA.Addr->getReachedUse());
+ RDA.Addr->setReachedUse(ReachedUses.front().Id);
+ }
+
+ NodeAddr<InstrNode*> IA = DA.Addr->getOwner(*this);
+ IA.Addr->removeMember(DA, *this);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
new file mode 100644
index 0000000..7da7bb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
@@ -0,0 +1,841 @@
+//===--- RDFGraph.h -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF)
+// for a non-SSA program representation (e.g. post-RA machine code).
+//
+//
+// *** Introduction
+//
+// The RDF graph is a collection of nodes, each of which denotes some element
+// of the program. There are two main types of such elements: code and refe-
+// rences. Conceptually, "code" is something that represents the structure
+// of the program, e.g. basic block or a statement, while "reference" is an
+// instance of accessing a register, e.g. a definition or a use. Nodes are
+// connected with each other based on the structure of the program (such as
+// blocks, instructions, etc.), and based on the data flow (e.g. reaching
+// definitions, reached uses, etc.). The single-reaching-definition principle
+// of SSA is generally observed, although, due to the non-SSA representation
+// of the program, there are some differences between the graph and a "pure"
+// SSA representation.
+//
+//
+// *** Implementation remarks
+//
+// Since the graph can contain a large number of nodes, memory consumption
+// was one of the major design considerations. As a result, there is a single
+// base class NodeBase which defines all members used by all possible derived
+// classes. The members are arranged in a union, and a derived class cannot
+// add any data members of its own. Each derived class only defines the
+// functional interface, i.e. member functions. NodeBase must be a POD,
+// which implies that all of its members must also be PODs.
+// Since nodes need to be connected with other nodes, pointers have been
+// replaced with 32-bit identifiers: each node has an id of type NodeId.
+// There are mapping functions in the graph that translate between actual
+// memory addresses and the corresponding identifiers.
+// A node id of 0 is equivalent to nullptr.
+//
+//
+// *** Structure of the graph
+//
+// A code node is always a collection of other nodes. For example, a code
+// node corresponding to a basic block will contain code nodes corresponding
+// to instructions. In turn, a code node corresponding to an instruction will
+// contain a list of reference nodes that correspond to the definitions and
+// uses of registers in that instruction. The members are arranged into a
+// circular list, which is yet another consequence of the effort to save
+// memory: for each member node it should be possible to obtain its owner,
+// and it should be possible to access all other members. There are other
+// ways to accomplish that, but the circular list seemed the most natural.
+//
+// +- CodeNode -+
+// | | <---------------------------------------------------+
+// +-+--------+-+ |
+// |FirstM |LastM |
+// | +-------------------------------------+ |
+// | | |
+// V V |
+// +----------+ Next +----------+ Next Next +----------+ Next |
+// | |----->| |-----> ... ----->| |----->-+
+// +- Member -+ +- Member -+ +- Member -+
+//
+// The order of members is such that related reference nodes (see below)
+// should be contiguous on the member list.
+//
+// A reference node is a node that encapsulates an access to a register,
+// in other words, data flowing into or out of a register. There are two
+// major kinds of reference nodes: defs and uses. A def node will contain
+// the id of the first reached use, and the id of the first reached def.
+// Each def and use will contain the id of the reaching def, and also the
+// id of the next reached def (for def nodes) or use (for use nodes).
+// The "next node sharing the same reaching def" is denoted as "sibling".
+// In summary:
+// - Def node contains: reaching def, sibling, first reached def, and first
+// reached use.
+// - Use node contains: reaching def and sibling.
+//
+// +-- DefNode --+
+// | R2 = ... | <---+--------------------+
+// ++---------+--+ | |
+// |Reached |Reached | |
+// |Def |Use | |
+// | | |Reaching |Reaching
+// | V |Def |Def
+// | +-- UseNode --+ Sib +-- UseNode --+ Sib Sib
+// | | ... = R2 |----->| ... = R2 |----> ... ----> 0
+// | +-------------+ +-------------+
+// V
+// +-- DefNode --+ Sib
+// | R2 = ... |----> ...
+// ++---------+--+
+// | |
+// | |
+// ... ...
+//
+// To get a full picture, the circular lists connecting blocks within a
+// function, instructions within a block, etc. should be superimposed with
+// the def-def, def-use links shown above.
+// To illustrate this, consider a small example in a pseudo-assembly:
+// foo:
+// add r2, r0, r1 ; r2 = r0+r1
+// addi r0, r2, 1 ; r0 = r2+1
+// ret r0 ; return value in r0
+//
+// The graph (in a format used by the debugging functions) would look like:
+//
+// DFG dump:[
+// f1: Function foo
+// b2: === BB#0 === preds(0), succs(0):
+// p3: phi [d4<r0>(,d12,u9):]
+// p5: phi [d6<r1>(,,u10):]
+// s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):]
+// s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):]
+// s14: ret [u15<r0>(d12):]
+// ]
+//
+// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the
+// kind of the node (i.e. f - function, b - basic block, p - phi, s - state-
+// ment, d - def, u - use).
+// The format of a def node is:
+// dN<R>(rd,d,u):sib,
+// where
+// N - numeric node id,
+// R - register being defined
+// rd - reaching def,
+// d - reached def,
+// u - reached use,
+// sib - sibling.
+// The format of a use node is:
+// uN<R>[!](rd):sib,
+// where
+// N - numeric node id,
+// R - register being used,
+// rd - reaching def,
+// sib - sibling.
+// Possible annotations (usually preceding the node id):
+// + - preserving def,
+// ~ - clobbering def,
+// " - shadow ref (follows the node id),
+// ! - fixed register (appears after register name).
+//
+// The circular lists are not explicit in the dump.
+//
+//
+// *** Node attributes
+//
+// NodeBase has a member "Attrs", which is the primary way of determining
+// the node's characteristics. The fields in this member decide whether
+// the node is a code node or a reference node (i.e. node's "type"), then
+// within each type, the "kind" determines what specifically this node
+// represents. The remaining bits, "flags", contain additional information
+// that is even more detailed than the "kind".
+// CodeNode's kinds are:
+// - Phi: Phi node, members are reference nodes.
+// - Stmt: Statement, members are reference nodes.
+// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt).
+// - Func: The whole function. The members are basic block nodes.
+// RefNode's kinds are:
+// - Use.
+// - Def.
+//
+// Meaning of flags:
+// - Preserving: applies only to defs. A preserving def is one that can
+// preserve some of the original bits among those that are included in
+// the register associated with that def. For example, if R0 is a 32-bit
+// register, but a def can only change the lower 16 bits, then it will
+// be marked as preserving.
+// - Shadow: a reference that has duplicates holding additional reaching
+// defs (see more below).
+// - Clobbering: applied only to defs, indicates that the value generated
+// by this def is unspecified. A typical example would be volatile registers
+// after function calls.
+//
+//
+// *** Shadow references
+//
+// It may happen that a super-register can have two (or more) non-overlapping
+// sub-registers. When both of these sub-registers are defined and followed
+// by a use of the super-register, the use of the super-register will not
+// have a unique reaching def: both defs of the sub-registers need to be
+// accounted for. In such cases, a duplicate use of the super-register is
+// added and it points to the extra reaching def. Both uses are marked with
+// a flag "shadow". Example:
+// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap:
+// set r0, 1 ; r0 = 1
+// set r1, 1 ; r1 = 1
+// addi t1, t0, 1 ; t1 = t0+1
+//
+// The DFG:
+// s1: set [d2<r0>(,,u9):]
+// s3: set [d4<r1>(,,u10):]
+// s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):]
+//
+// The statement s5 has two use nodes for t0: u7" and u9". The quotation
+// mark " indicates that the node is a shadow.
+//
+#ifndef RDF_GRAPH_H
+#define RDF_GRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <functional>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ class MachineBasicBlock;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineOperand;
+ class MachineDominanceFrontier;
+ class MachineDominatorTree;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+}
+
+namespace rdf {
+ typedef uint32_t NodeId;
+
+ struct NodeAttrs {
+ enum : uint16_t {
+ None = 0x0000, // Nothing
+
+ // Types: 2 bits
+ TypeMask = 0x0003,
+ Code = 0x0001, // 01, Container
+ Ref = 0x0002, // 10, Reference
+
+ // Kind: 3 bits
+ KindMask = 0x0007 << 2,
+ Def = 0x0001 << 2, // 001
+ Use = 0x0002 << 2, // 010
+ Phi = 0x0003 << 2, // 011
+ Stmt = 0x0004 << 2, // 100
+ Block = 0x0005 << 2, // 101
+ Func = 0x0006 << 2, // 110
+
+ // Flags: 5 bits for now
+ FlagMask = 0x001F << 5,
+ Shadow = 0x0001 << 5, // 00001, Has extra reaching defs.
+ Clobbering = 0x0002 << 5, // 00010, Produces unspecified values.
+ PhiRef = 0x0004 << 5, // 00100, Member of PhiNode.
+ Preserving = 0x0008 << 5, // 01000, Def can keep original bits.
+ Fixed = 0x0010 << 5, // 10000, Fixed register.
+ };
+
+ static uint16_t type(uint16_t T) { return T & TypeMask; }
+ static uint16_t kind(uint16_t T) { return T & KindMask; }
+ static uint16_t flags(uint16_t T) { return T & FlagMask; }
+
+ static uint16_t set_type(uint16_t A, uint16_t T) {
+ return (A & ~TypeMask) | T;
+ }
+ static uint16_t set_kind(uint16_t A, uint16_t K) {
+ return (A & ~KindMask) | K;
+ }
+ static uint16_t set_flags(uint16_t A, uint16_t F) {
+ return (A & ~FlagMask) | F;
+ }
+
+ // Test if A contains B.
+ static bool contains(uint16_t A, uint16_t B) {
+ if (type(A) != Code)
+ return false;
+ uint16_t KB = kind(B);
+ switch (kind(A)) {
+ case Func:
+ return KB == Block;
+ case Block:
+ return KB == Phi || KB == Stmt;
+ case Phi:
+ case Stmt:
+ return type(B) == Ref;
+ }
+ return false;
+ }
+ };
+
+ template <typename T> struct NodeAddr {
+ NodeAddr() : Addr(nullptr), Id(0) {}
+ NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
+ NodeAddr(const NodeAddr&) = default;
+ NodeAddr &operator= (const NodeAddr&) = default;
+
+ bool operator== (const NodeAddr<T> &NA) const {
+ assert((Addr == NA.Addr) == (Id == NA.Id));
+ return Addr == NA.Addr;
+ }
+ bool operator!= (const NodeAddr<T> &NA) const {
+ return !operator==(NA);
+ }
+ // Type cast (casting constructor). The reason for having this class
+ // instead of std::pair.
+ template <typename S> NodeAddr(const NodeAddr<S> &NA)
+ : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
+ T Addr;
+ NodeId Id;
+ };
+
+ struct NodeBase;
+
+ // Fast memory allocation and translation between node id and node address.
+ // This is really the same idea as the one underlying the "bump pointer
+ // allocator", the difference being in the translation. A node id is
+ // composed of two components: the index of the block in which it was
+ // allocated, and the index within the block. With the default settings,
+ // where the number of nodes per block is 4096, the node id (minus 1) is:
+ //
+ // bit position: 11 0
+ // +----------------------------+--------------+
+ // | Index of the block |Index in block|
+ // +----------------------------+--------------+
+ //
+ // The actual node id is the above plus 1, to avoid creating a node id of 0.
+ //
+ // This method significantly improved the build time, compared to using maps
+ // (std::unordered_map or DenseMap) to translate between pointers and ids.
+ struct NodeAllocator {
+ // Amount of storage for a single node.
+ enum { NodeMemSize = 32 };
+ NodeAllocator(uint32_t NPB = 4096)
+ : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
+ IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
+ assert(isPowerOf2_32(NPB));
+ }
+ NodeBase *ptr(NodeId N) const {
+ uint32_t N1 = N-1;
+ uint32_t BlockN = N1 >> BitsPerIndex;
+ uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
+ return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
+ }
+ NodeId id(const NodeBase *P) const;
+ NodeAddr<NodeBase*> New();
+ void clear();
+
+ private:
+ void startNewBlock();
+ bool needNewBlock();
+ uint32_t makeId(uint32_t Block, uint32_t Index) const {
+ // Add 1 to the id, to avoid the id of 0, which is treated as "null".
+ return ((Block << BitsPerIndex) | Index) + 1;
+ }
+
+ const uint32_t NodesPerBlock;
+ const uint32_t BitsPerIndex;
+ const uint32_t IndexMask;
+ char *ActiveEnd;
+ std::vector<char*> Blocks;
+ typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
+ AllocatorTy MemPool;
+ };
+
+ struct RegisterRef {
+ unsigned Reg, Sub;
+
+ // No non-trivial constructors, since this will be a member of a union.
+ RegisterRef() = default;
+ RegisterRef(const RegisterRef &RR) = default;
+ RegisterRef &operator= (const RegisterRef &RR) = default;
+ bool operator== (const RegisterRef &RR) const {
+ return Reg == RR.Reg && Sub == RR.Sub;
+ }
+ bool operator!= (const RegisterRef &RR) const {
+ return !operator==(RR);
+ }
+ bool operator< (const RegisterRef &RR) const {
+ return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub);
+ }
+ };
+ typedef std::set<RegisterRef> RegisterSet;
+
+ struct RegisterAliasInfo {
+ RegisterAliasInfo(const TargetRegisterInfo &tri) : TRI(tri) {}
+ virtual ~RegisterAliasInfo() {}
+
+ virtual std::vector<RegisterRef> getAliasSet(RegisterRef RR) const;
+ virtual bool alias(RegisterRef RA, RegisterRef RB) const;
+ virtual bool covers(RegisterRef RA, RegisterRef RB) const;
+ virtual bool covers(const RegisterSet &RRs, RegisterRef RR) const;
+
+ const TargetRegisterInfo &TRI;
+ };
+
+ struct TargetOperandInfo {
+ TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
+ virtual ~TargetOperandInfo() {}
+ virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
+
+ const TargetInstrInfo &TII;
+ };
+
+
+ struct DataFlowGraph;
+
+ struct NodeBase {
+ public:
+ // Make sure this is a POD.
+ NodeBase() = default;
+ uint16_t getType() const { return NodeAttrs::type(Attrs); }
+ uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
+ uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
+ NodeId getNext() const { return Next; }
+
+ uint16_t getAttrs() const { return Attrs; }
+ void setAttrs(uint16_t A) { Attrs = A; }
+ void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
+
+ // Insert node NA after "this" in the circular chain.
+ void append(NodeAddr<NodeBase*> NA);
+ // Initialize all members to 0.
+ void init() { memset(this, 0, sizeof *this); }
+ void setNext(NodeId N) { Next = N; }
+
+ protected:
+ uint16_t Attrs;
+ uint16_t Reserved;
+ NodeId Next; // Id of the next node in the circular chain.
+ // Definitions of nested types. Using anonymous nested structs would make
+ // this class definition clearer, but unnamed structs are not a part of
+ // the standard.
+ struct Def_struct {
+ NodeId DD, DU; // Ids of the first reached def and use.
+ };
+ struct PhiU_struct {
+ NodeId PredB; // Id of the predecessor block for a phi use.
+ };
+ struct Code_struct {
+ void *CP; // Pointer to the actual code.
+ NodeId FirstM, LastM; // Id of the first member and last.
+ };
+ struct Ref_struct {
+ NodeId RD, Sib; // Ids of the reaching def and the sibling.
+ union {
+ Def_struct Def;
+ PhiU_struct PhiU;
+ };
+ union {
+ MachineOperand *Op; // Non-phi refs point to a machine operand.
+ RegisterRef RR; // Phi refs store register info directly.
+ };
+ };
+
+ // The actual payload.
+ union {
+ Ref_struct Ref;
+ Code_struct Code;
+ };
+ };
+ // The allocator allocates chunks of 32 bytes for each node. The fact that
+ // each node takes 32 bytes in memory is used for fast translation between
+ // the node id and the node address.
+ static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
+ "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
+
+ typedef std::vector<NodeAddr<NodeBase*>> NodeList;
+ typedef std::set<NodeId> NodeSet;
+
+ struct RefNode : public NodeBase {
+ RefNode() = default;
+ RegisterRef getRegRef() const;
+ MachineOperand &getOp() {
+ assert(!(getFlags() & NodeAttrs::PhiRef));
+ return *Ref.Op;
+ }
+ void setRegRef(RegisterRef RR);
+ void setRegRef(MachineOperand *Op);
+ NodeId getReachingDef() const {
+ return Ref.RD;
+ }
+ void setReachingDef(NodeId RD) {
+ Ref.RD = RD;
+ }
+ NodeId getSibling() const {
+ return Ref.Sib;
+ }
+ void setSibling(NodeId Sib) {
+ Ref.Sib = Sib;
+ }
+ bool isUse() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Use;
+ }
+ bool isDef() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Def;
+ }
+
+ template <typename Predicate>
+ NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
+ const DataFlowGraph &G);
+ NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+ };
+
+ struct DefNode : public RefNode {
+ NodeId getReachedDef() const {
+ return Ref.Def.DD;
+ }
+ void setReachedDef(NodeId D) {
+ Ref.Def.DD = D;
+ }
+ NodeId getReachedUse() const {
+ return Ref.Def.DU;
+ }
+ void setReachedUse(NodeId U) {
+ Ref.Def.DU = U;
+ }
+
+ void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+ };
+
+ struct UseNode : public RefNode {
+ void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+ };
+
+ struct PhiUseNode : public UseNode {
+ NodeId getPredecessor() const {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ return Ref.PhiU.PredB;
+ }
+ void setPredecessor(NodeId B) {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ Ref.PhiU.PredB = B;
+ }
+ };
+
+ struct CodeNode : public NodeBase {
+ template <typename T> T getCode() const {
+ return static_cast<T>(Code.CP);
+ }
+ void setCode(void *C) {
+ Code.CP = C;
+ }
+
+ NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const;
+ NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const;
+ void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+ void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
+ const DataFlowGraph &G);
+ void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+
+ NodeList members(const DataFlowGraph &G) const;
+ template <typename Predicate>
+ NodeList members_if(Predicate P, const DataFlowGraph &G) const;
+ };
+
+ struct InstrNode : public CodeNode {
+ NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+ };
+
+ struct PhiNode : public InstrNode {
+ MachineInstr *getCode() const {
+ return nullptr;
+ }
+ };
+
+ struct StmtNode : public InstrNode {
+ MachineInstr *getCode() const {
+ return CodeNode::getCode<MachineInstr*>();
+ }
+ };
+
+ struct BlockNode : public CodeNode {
+ MachineBasicBlock *getCode() const {
+ return CodeNode::getCode<MachineBasicBlock*>();
+ }
+ void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
+ };
+
+ struct FuncNode : public CodeNode {
+ MachineFunction *getCode() const {
+ return CodeNode::getCode<MachineFunction*>();
+ }
+ NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const;
+ NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
+ };
+
+ struct DataFlowGraph {
+ DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
+ const TargetOperandInfo &toi);
+
+ NodeBase *ptr(NodeId N) const;
+ template <typename T> T ptr(NodeId N) const {
+ return static_cast<T>(ptr(N));
+ }
+ NodeId id(const NodeBase *P) const;
+
+ template <typename T> NodeAddr<T> addr(NodeId N) const {
+ return { ptr<T>(N), N };
+ }
+
+ NodeAddr<FuncNode*> getFunc() const {
+ return Func;
+ }
+ MachineFunction &getMF() const {
+ return MF;
+ }
+ const TargetInstrInfo &getTII() const {
+ return TII;
+ }
+ const TargetRegisterInfo &getTRI() const {
+ return TRI;
+ }
+ const MachineDominatorTree &getDT() const {
+ return MDT;
+ }
+ const MachineDominanceFrontier &getDF() const {
+ return MDF;
+ }
+ const RegisterAliasInfo &getRAI() const {
+ return RAI;
+ }
+
+ struct DefStack {
+ DefStack() = default;
+ bool empty() const { return Stack.empty() || top() == bottom(); }
+ private:
+ typedef NodeAddr<DefNode*> value_type;
+ struct Iterator {
+ typedef DefStack::value_type value_type;
+ Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
+ Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+ value_type operator*() const {
+ assert(Pos >= 1);
+ return DS.Stack[Pos-1];
+ }
+ const value_type *operator->() const {
+ assert(Pos >= 1);
+ return &DS.Stack[Pos-1];
+ }
+ bool operator==(const Iterator &It) const { return Pos == It.Pos; }
+ bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+ private:
+ Iterator(const DefStack &S, bool Top);
+ // Pos-1 is the index in the StorageType object that corresponds to
+ // the top of the DefStack.
+ const DefStack &DS;
+ unsigned Pos;
+ friend struct DefStack;
+ };
+ public:
+ typedef Iterator iterator;
+ iterator top() const { return Iterator(*this, true); }
+ iterator bottom() const { return Iterator(*this, false); }
+ unsigned size() const;
+
+ void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); }
+ void pop();
+ void start_block(NodeId N);
+ void clear_block(NodeId N);
+ private:
+ friend struct Iterator;
+ typedef std::vector<value_type> StorageType;
+ bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
+ return (P.Addr == nullptr) && (N == 0 || P.Id == N);
+ }
+ unsigned nextUp(unsigned P) const;
+ unsigned nextDown(unsigned P) const;
+ StorageType Stack;
+ };
+
+ typedef std::map<RegisterRef,DefStack> DefStackMap;
+
+ void build();
+ void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void markBlock(NodeId B, DefStackMap &DefM);
+ void releaseBlock(NodeId B, DefStackMap &DefM);
+
+ NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+ NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA, bool Create);
+ NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+ NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA, bool Create);
+ NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+
+ NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+
+ void unlinkUse(NodeAddr<UseNode*> UA);
+ void unlinkDef(NodeAddr<DefNode*> DA);
+
+ // Some useful filters.
+ template <uint16_t Kind>
+ static bool IsRef(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == Kind;
+ }
+ template <uint16_t Kind>
+ static bool IsCode(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Code &&
+ BA.Addr->getKind() == Kind;
+ }
+ static bool IsDef(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Def;
+ }
+ static bool IsUse(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Use;
+ }
+ static bool IsPhi(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Code &&
+ BA.Addr->getKind() == NodeAttrs::Phi;
+ }
+
+ private:
+ void reset();
+
+ NodeAddr<NodeBase*> newNode(uint16_t Attrs);
+ NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
+ NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner,
+ RegisterRef RR, NodeAddr<BlockNode*> PredB,
+ uint16_t Flags = NodeAttrs::PhiRef);
+ NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+ RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
+ NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner);
+ NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner,
+ MachineInstr *MI);
+ NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner,
+ MachineBasicBlock *BB);
+ NodeAddr<FuncNode*> newFunc(MachineFunction *MF);
+
+ template <typename Predicate>
+ std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
+ locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ Predicate P) const;
+
+ typedef std::map<NodeId,RegisterSet> BlockRefsMap;
+
+ void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In);
+ void buildBlockRefs(NodeAddr<BlockNode*> BA, BlockRefsMap &RefM);
+ void recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA);
+ void buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA);
+ void removeUnusedPhis();
+
+ template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
+ NodeAddr<T> TA, DefStack &DS);
+ void linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA);
+ void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
+
+ TimerGroup TimeG;
+ NodeAddr<FuncNode*> Func;
+ NodeAllocator Memory;
+
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const RegisterAliasInfo &RAI;
+ const TargetOperandInfo &TOI;
+ }; // struct DataFlowGraph
+
+ template <typename Predicate>
+ NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P,
+ bool NextOnly, const DataFlowGraph &G) {
+ // Get the "Next" reference in the circular list that references RR and
+ // satisfies predicate "Pred".
+ auto NA = G.addr<NodeBase*>(getNext());
+
+ while (NA.Addr != this) {
+ if (NA.Addr->getType() == NodeAttrs::Ref) {
+ NodeAddr<RefNode*> RA = NA;
+ if (RA.Addr->getRegRef() == RR && P(NA))
+ return NA;
+ if (NextOnly)
+ break;
+ NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ } else {
+ // We've hit the beginning of the chain.
+ assert(NA.Addr->getType() == NodeAttrs::Code);
+ NodeAddr<CodeNode*> CA = NA;
+ NA = CA.Addr->getFirstMember(G);
+ }
+ }
+ // Return the equivalent of "nullptr" if such a node was not found.
+ return NodeAddr<RefNode*>();
+ }
+
+ template <typename Predicate>
+ NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
+ NodeList MM;
+ auto M = getFirstMember(G);
+ if (M.Id == 0)
+ return MM;
+
+ while (M.Addr != this) {
+ if (P(M))
+ MM.push_back(M);
+ M = G.addr<NodeBase*>(M.Addr->getNext());
+ }
+ return MM;
+ }
+
+
+ template <typename T> struct Print;
+ template <typename T>
+ raw_ostream &operator<< (raw_ostream &OS, const Print<T> &P);
+
+ template <typename T>
+ struct Print {
+ Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
+ const T &Obj;
+ const DataFlowGraph &G;
+ };
+
+ template <typename T>
+ struct PrintNode : Print<NodeAddr<T>> {
+ PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
+ : Print<NodeAddr<T>>(x, g) {}
+ };
+} // namespace rdf
+
+#endif // RDF_GRAPH_H
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
new file mode 100644
index 0000000..1d9bd37
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@@ -0,0 +1,848 @@
+//===--- RDFLiveness.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Computation of the liveness information from the data-flow graph.
+//
+// The main functionality of this code is to compute block live-in
+// information. With the live-in information in place, the placement
+// of kill flags can also be recalculated.
+//
+// The block live-in calculation is based on the ideas from the following
+// publication:
+//
+// Dibyendu Das, Ramakrishna Upadrasta, Benoit Dupont de Dinechin.
+// "Efficient Liveness Computation Using Merge Sets and DJ-Graphs."
+// ACM Transactions on Architecture and Code Optimization, Association for
+// Computing Machinery, 2012, ACM TACO Special Issue on "High-Performance
+// and Embedded Architectures and Compilers", 8 (4),
+// <10.1145/2086696.2086706>. <hal-00647369>
+//
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+namespace rdf {
+ template<>
+ raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
+ OS << '{';
+ for (auto I : P.Obj) {
+ OS << ' ' << Print<RegisterRef>(I.first, P.G) << '{';
+ for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
+ OS << Print<NodeId>(*J, P.G);
+ if (++J != E)
+ OS << ',';
+ }
+ OS << '}';
+ }
+ OS << " }";
+ return OS;
+ }
+}
+
+// The order in the returned sequence is the order of reaching defs in the
+// upward traversal: the first def is the closest to the given reference RefA,
+// the next one is further up, and so on.
+// The list ends at a reaching phi def, or when the reference from RefA is
+// covered by the defs in the list (see FullChain).
+// This function provides two modes of operation:
+// (1) Returning the sequence of reaching defs for a particular reference
+// node. This sequence will terminate at the first phi node [1].
+// (2) Returning a partial sequence of reaching defs, where the final goal
+// is to traverse past phi nodes to the actual defs arising from the code
+// itself.
+// In mode (2), the register reference for which the search was started
+// may be different from the reference node RefA, for which this call was
+// made, hence the argument RefRR, which holds the original register.
+// Also, some definitions may have already been encountered in a previous
+// call that will influence register covering. The register references
+// already defined are passed in through DefRRs.
+// In mode (1), the "continuation" considerations do not apply, and the
+// RefRR is the same as the register in RefA, and the set DefRRs is empty.
+//
+// [1] It is possible for multiple phi nodes to be included in the returned
+// sequence:
+// SubA = phi ...
+// SubB = phi ...
+// ... = SuperAB(rdef:SubA), SuperAB"(rdef:SubB)
+// However, these phi nodes are independent from one another in terms of
+// the data-flow.
+
+NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, bool FullChain, const RegisterSet &DefRRs) {
+ SetVector<NodeId> DefQ;
+ SetVector<NodeId> Owners;
+
+ // The initial queue should not have reaching defs for shadows. The
+ // whole point of a shadow is that it will have a reaching def that
+ // is not aliased to the reaching defs of the related shadows.
+ NodeId Start = RefA.Id;
+ auto SNA = DFG.addr<RefNode*>(Start);
+ if (NodeId RD = SNA.Addr->getReachingDef())
+ DefQ.insert(RD);
+
+ // Collect all the reaching defs, going up until a phi node is encountered,
+ // or there are no more reaching defs. From this set, the actual set of
+ // reaching defs will be selected.
+ // The traversal upwards must go on until a covering def is encountered.
+ // It is possible that a collection of non-covering (individually) defs
+ // will be sufficient, but keep going until a covering one is found.
+ for (unsigned i = 0; i < DefQ.size(); ++i) {
+ auto TA = DFG.addr<DefNode*>(DefQ[i]);
+ if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
+ continue;
+ // Stop at the covering/overwriting def of the initial register reference.
+ RegisterRef RR = TA.Addr->getRegRef();
+ if (RAI.covers(RR, RefRR)) {
+ uint16_t Flags = TA.Addr->getFlags();
+ if (!(Flags & NodeAttrs::Preserving))
+ continue;
+ }
+ // Get the next level of reaching defs. This will include multiple
+ // reaching defs for shadows.
+ for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
+ if (auto RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ DefQ.insert(RD);
+ }
+
+ // Remove all non-phi defs that are not aliased to RefRR, and collect
+ // the owners of the remaining defs.
+ SetVector<NodeId> Defs;
+ for (auto N : DefQ) {
+ auto TA = DFG.addr<DefNode*>(N);
+ bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
+ if (!IsPhi && !RAI.alias(RefRR, TA.Addr->getRegRef()))
+ continue;
+ Defs.insert(TA.Id);
+ Owners.insert(TA.Addr->getOwner(DFG).Id);
+ }
+
+ // Return the MachineBasicBlock containing a given instruction.
+ auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* {
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent();
+ assert(IA.Addr->getKind() == NodeAttrs::Phi);
+ NodeAddr<PhiNode*> PA = IA;
+ NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
+ return BA.Addr->getCode();
+ };
+ // Less(A,B) iff instruction A is further down in the dominator tree than B.
+ auto Less = [&Block,this] (NodeId A, NodeId B) -> bool {
+ if (A == B)
+ return false;
+ auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B);
+ MachineBasicBlock *BA = Block(OA), *BB = Block(OB);
+ if (BA != BB)
+ return MDT.dominates(BB, BA);
+ // They are in the same block.
+ bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
+ bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
+ if (StmtA) {
+ if (!StmtB) // OB is a phi and phis dominate statements.
+ return true;
+ auto CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
+ auto CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+ // The order must be linear, so tie-break such equalities.
+ if (CA == CB)
+ return A < B;
+ return MDT.dominates(CB, CA);
+ } else {
+ // OA is a phi.
+ if (StmtB)
+ return false;
+ // Both are phis. There is no ordering between phis (in terms of
+ // the data-flow), so tie-break this via node id comparison.
+ return A < B;
+ }
+ };
+
+ std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
+ std::sort(Tmp.begin(), Tmp.end(), Less);
+
+ // The vector is a list of instructions, so that defs coming from
+ // the same instruction don't need to be artificially ordered.
+ // Then, when computing the initial segment, and iterating over an
+ // instruction, pick the defs that contribute to the covering (i.e. is
+ // not covered by previously added defs). Check the defs individually,
+ // i.e. first check each def if is covered or not (without adding them
+ // to the tracking set), and then add all the selected ones.
+
+ // The reason for this is this example:
+ // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes).
+ // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be
+ // covered if we added A first, and A would be covered
+ // if we added B first.
+
+ NodeList RDefs;
+ RegisterSet RRs = DefRRs;
+
+ auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool {
+ return TA.Addr->getKind() == NodeAttrs::Def &&
+ Defs.count(TA.Id);
+ };
+ for (auto T : Tmp) {
+ if (!FullChain && RAI.covers(RRs, RefRR))
+ break;
+ auto TA = DFG.addr<InstrNode*>(T);
+ bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
+ NodeList Ds;
+ for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) {
+ auto QR = DA.Addr->getRegRef();
+ // Add phi defs even if they are covered by subsequent defs. This is
+ // for cases where the reached use is not covered by any of the defs
+ // encountered so far: the phi def is needed to expose the liveness
+ // of that use to the entry of the block.
+ // Example:
+ // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2.
+ // d2<R3>(d1,,u3), ...
+ // ..., u3<D1>(d2) This use needs to be live on entry.
+ if (FullChain || IsPhi || !RAI.covers(RRs, QR))
+ Ds.push_back(DA);
+ }
+ RDefs.insert(RDefs.end(), Ds.begin(), Ds.end());
+ for (NodeAddr<DefNode*> DA : Ds) {
+ // When collecting a full chain of definitions, do not consider phi
+ // defs to actually define a register.
+ uint16_t Flags = DA.Addr->getFlags();
+ if (!FullChain || !(Flags & NodeAttrs::PhiRef))
+ if (!(Flags & NodeAttrs::Preserving))
+ RRs.insert(DA.Addr->getRegRef());
+ }
+ }
+
+ return RDefs;
+}
+
+
+static const RegisterSet NoRegs;
+
+NodeList Liveness::getAllReachingDefs(NodeAddr<RefNode*> RefA) {
+ return getAllReachingDefs(RefA.Addr->getRegRef(), RefA, false, NoRegs);
+}
+
+
+void Liveness::computePhiInfo() {
+ NodeList Phis;
+ NodeAddr<FuncNode*> FA = DFG.getFunc();
+ auto Blocks = FA.Addr->members(DFG);
+ for (NodeAddr<BlockNode*> BA : Blocks) {
+ auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+ Phis.insert(Phis.end(), Ps.begin(), Ps.end());
+ }
+
+ // phi use -> (map: reaching phi -> set of registers defined in between)
+ std::map<NodeId,std::map<NodeId,RegisterSet>> PhiUp;
+ std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
+
+ // Go over all phis.
+ for (NodeAddr<PhiNode*> PhiA : Phis) {
+ // Go over all defs and collect the reached uses that are non-phi uses
+ // (i.e. the "real uses").
+ auto &RealUses = RealUseMap[PhiA.Id];
+ auto PhiRefs = PhiA.Addr->members(DFG);
+
+ // Have a work queue of defs whose reached uses need to be found.
+ // For each def, add to the queue all reached (non-phi) defs.
+ SetVector<NodeId> DefQ;
+ NodeSet PhiDefs;
+ for (auto R : PhiRefs) {
+ if (!DFG.IsRef<NodeAttrs::Def>(R))
+ continue;
+ DefQ.insert(R.Id);
+ PhiDefs.insert(R.Id);
+ }
+ for (unsigned i = 0; i < DefQ.size(); ++i) {
+ NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]);
+ NodeId UN = DA.Addr->getReachedUse();
+ while (UN != 0) {
+ NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
+ if (!(A.Addr->getFlags() & NodeAttrs::PhiRef))
+ RealUses[getRestrictedRegRef(A)].insert(A.Id);
+ UN = A.Addr->getSibling();
+ }
+ NodeId DN = DA.Addr->getReachedDef();
+ while (DN != 0) {
+ NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN);
+ for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) {
+ uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags();
+ // Must traverse the reached-def chain. Consider:
+ // def(D0) -> def(R0) -> def(R0) -> use(D0)
+ // The reachable use of D0 passes through a def of R0.
+ if (!(Flags & NodeAttrs::PhiRef))
+ DefQ.insert(T.Id);
+ }
+ DN = A.Addr->getSibling();
+ }
+ }
+ // Filter out these uses that appear to be reachable, but really
+ // are not. For example:
+ //
+ // R1:0 = d1
+ // = R1:0 u2 Reached by d1.
+ // R0 = d3
+ // = R1:0 u4 Still reached by d1: indirectly through
+ // the def d3.
+ // R1 = d5
+ // = R1:0 u6 Not reached by d1 (covered collectively
+ // by d3 and d5), but following reached
+ // defs and uses from d1 will lead here.
+ auto HasDef = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool {
+ return PhiDefs.count(DA.Id);
+ };
+ for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) {
+ // For each reached register UI->first, there is a set UI->second, of
+ // uses of it. For each such use, check if it is reached by this phi,
+ // i.e. check if the set of its reaching uses intersects the set of
+ // this phi's defs.
+ auto &Uses = UI->second;
+ for (auto I = Uses.begin(), E = Uses.end(); I != E; ) {
+ auto UA = DFG.addr<UseNode*>(*I);
+ NodeList RDs = getAllReachingDefs(UI->first, UA);
+ if (std::any_of(RDs.begin(), RDs.end(), HasDef))
+ ++I;
+ else
+ I = Uses.erase(I);
+ }
+ if (Uses.empty())
+ UI = RealUses.erase(UI);
+ else
+ ++UI;
+ }
+
+ // If this phi reaches some "real" uses, add it to the queue for upward
+ // propagation.
+ if (!RealUses.empty())
+ PhiUQ.push_back(PhiA.Id);
+
+ // Go over all phi uses and check if the reaching def is another phi.
+ // Collect the phis that are among the reaching defs of these uses.
+ // While traversing the list of reaching defs for each phi use, collect
+ // the set of registers defined between this phi (Phi) and the owner phi
+ // of the reaching def.
+ for (auto I : PhiRefs) {
+ if (!DFG.IsRef<NodeAttrs::Use>(I))
+ continue;
+ NodeAddr<UseNode*> UA = I;
+ auto &UpMap = PhiUp[UA.Id];
+ RegisterSet DefRRs;
+ for (NodeAddr<DefNode*> DA : getAllReachingDefs(UA)) {
+ if (DA.Addr->getFlags() & NodeAttrs::PhiRef)
+ UpMap[DA.Addr->getOwner(DFG).Id] = DefRRs;
+ else
+ DefRRs.insert(DA.Addr->getRegRef());
+ }
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "Phi-up-to-phi map:\n";
+ for (auto I : PhiUp) {
+ dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {";
+ for (auto R : I.second)
+ dbgs() << ' ' << Print<NodeId>(R.first, DFG)
+ << Print<RegisterSet>(R.second, DFG);
+ dbgs() << " }\n";
+ }
+ }
+
+ // Propagate the reached registers up in the phi chain.
+ //
+ // The following type of situation needs careful handling:
+ //
+ // phi d1<R1:0> (1)
+ // |
+ // ... d2<R1>
+ // |
+ // phi u3<R1:0> (2)
+ // |
+ // ... u4<R1>
+ //
+ // The phi node (2) defines a register pair R1:0, and reaches a "real"
+ // use u4 of just R1. The same phi node is also known to reach (upwards)
+ // the phi node (1). However, the use u4 is not reached by phi (1),
+ // because of the intervening definition d2 of R1. The data flow between
+ // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0.
+ //
+ // When propagating uses up the phi chains, get the all reaching defs
+ // for a given phi use, and traverse the list until the propagated ref
+ // is covered, or until or until reaching the final phi. Only assume
+ // that the reference reaches the phi in the latter case.
+
+ for (unsigned i = 0; i < PhiUQ.size(); ++i) {
+ auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
+ auto &RealUses = RealUseMap[PA.Id];
+ for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ NodeAddr<UseNode*> UA = U;
+ auto &UpPhis = PhiUp[UA.Id];
+ for (auto UP : UpPhis) {
+ bool Changed = false;
+ auto &MidDefs = UP.second;
+ // Collect the set UpReached of uses that are reached by the current
+ // phi PA, and are not covered by any intervening def between PA and
+ // the upward phi UP.
+ RegisterSet UpReached;
+ for (auto T : RealUses) {
+ if (!isRestricted(PA, UA, T.first))
+ continue;
+ if (!RAI.covers(MidDefs, T.first))
+ UpReached.insert(T.first);
+ }
+ if (UpReached.empty())
+ continue;
+ // Update the set PRUs of real uses reached by the upward phi UP with
+ // the actual set of uses (UpReached) that the UP phi reaches.
+ auto &PRUs = RealUseMap[UP.first];
+ for (auto R : UpReached) {
+ unsigned Z = PRUs[R].size();
+ PRUs[R].insert(RealUses[R].begin(), RealUses[R].end());
+ Changed |= (PRUs[R].size() != Z);
+ }
+ if (Changed)
+ PhiUQ.push_back(UP.first);
+ }
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "Real use map:\n";
+ for (auto I : RealUseMap) {
+ dbgs() << "phi " << Print<NodeId>(I.first, DFG);
+ NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
+ NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
+ if (!Ds.empty()) {
+ RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef();
+ dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>';
+ } else {
+ dbgs() << "<noreg>";
+ }
+ dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n';
+ }
+ }
+}
+
+
+void Liveness::computeLiveIns() {
+ // Populate the node-to-block map. This speeds up the calculations
+ // significantly.
+ NBMap.clear();
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ NBMap.insert(std::make_pair(RA.Id, BB));
+ NBMap.insert(std::make_pair(IA.Id, BB));
+ }
+ }
+
+ MachineFunction &MF = DFG.getMF();
+
+ // Compute IDF first, then the inverse.
+ decltype(IIDF) IDF;
+ for (auto &B : MF) {
+ auto F1 = MDF.find(&B);
+ if (F1 == MDF.end())
+ continue;
+ SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end());
+ for (unsigned i = 0; i < IDFB.size(); ++i) {
+ auto F2 = MDF.find(IDFB[i]);
+ if (F2 != MDF.end())
+ IDFB.insert(F2->second.begin(), F2->second.end());
+ }
+ // Add B to the IDF(B). This will put B in the IIDF(B).
+ IDFB.insert(&B);
+ IDF[&B].insert(IDFB.begin(), IDFB.end());
+ }
+
+ for (auto I : IDF)
+ for (auto S : I.second)
+ IIDF[S].insert(I.first);
+
+ computePhiInfo();
+
+ NodeAddr<FuncNode*> FA = DFG.getFunc();
+ auto Blocks = FA.Addr->members(DFG);
+
+ // Build the phi live-on-entry map.
+ for (NodeAddr<BlockNode*> BA : Blocks) {
+ MachineBasicBlock *MB = BA.Addr->getCode();
+ auto &LON = PhiLON[MB];
+ for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG))
+ for (auto S : RealUseMap[P.Id])
+ LON[S.first].insert(S.second.begin(), S.second.end());
+ }
+
+ if (Trace) {
+ dbgs() << "Phi live-on-entry map:\n";
+ for (auto I : PhiLON)
+ dbgs() << "block #" << I.first->getNumber() << " -> "
+ << Print<RefMap>(I.second, DFG) << '\n';
+ }
+
+ // Build the phi live-on-exit map. Each phi node has some set of reached
+ // "real" uses. Propagate this set backwards into the block predecessors
+ // through the reaching defs of the corresponding phi uses.
+ for (NodeAddr<BlockNode*> BA : Blocks) {
+ auto Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+ for (NodeAddr<PhiNode*> PA : Phis) {
+ auto &RUs = RealUseMap[PA.Id];
+ if (RUs.empty())
+ continue;
+
+ for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ NodeAddr<PhiUseNode*> UA = U;
+ if (UA.Addr->getReachingDef() == 0)
+ continue;
+
+ // Mark all reached "real" uses of P as live on exit in the
+ // predecessor.
+ // Remap all the RUs so that they have a correct reaching def.
+ auto PrA = DFG.addr<BlockNode*>(UA.Addr->getPredecessor());
+ auto &LOX = PhiLOX[PrA.Addr->getCode()];
+ for (auto R : RUs) {
+ RegisterRef RR = R.first;
+ if (!isRestricted(PA, UA, RR))
+ RR = getRestrictedRegRef(UA);
+ // The restricted ref may be different from the ref that was
+ // accessed in the "real use". This means that this phi use
+ // is not the one that carries this reference, so skip it.
+ if (!RAI.alias(R.first, RR))
+ continue;
+ for (auto D : getAllReachingDefs(RR, UA))
+ LOX[RR].insert(D.Id);
+ }
+ } // for U : phi uses
+ } // for P : Phis
+ } // for B : Blocks
+
+ if (Trace) {
+ dbgs() << "Phi live-on-exit map:\n";
+ for (auto I : PhiLOX)
+ dbgs() << "block #" << I.first->getNumber() << " -> "
+ << Print<RefMap>(I.second, DFG) << '\n';
+ }
+
+ RefMap LiveIn;
+ traverse(&MF.front(), LiveIn);
+
+ // Add function live-ins to the live-in set of the function entry block.
+ auto &EntryIn = LiveMap[&MF.front()];
+ for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
+ EntryIn.insert({I->first,0});
+
+ if (Trace) {
+ // Dump the liveness map
+ for (auto &B : MF) {
+ BitVector LV(TRI.getNumRegs());
+ for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
+ LV.set(I->PhysReg);
+ dbgs() << "BB#" << B.getNumber() << "\t rec = {";
+ for (int x = LV.find_first(); x >= 0; x = LV.find_next(x))
+ dbgs() << ' ' << Print<RegisterRef>({unsigned(x),0}, DFG);
+ dbgs() << " }\n";
+ dbgs() << "\tcomp = " << Print<RegisterSet>(LiveMap[&B], DFG) << '\n';
+ }
+ }
+}
+
+
+void Liveness::resetLiveIns() {
+ for (auto &B : DFG.getMF()) {
+ // Remove all live-ins.
+ std::vector<unsigned> T;
+ for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
+ T.push_back(I->PhysReg);
+ for (auto I : T)
+ B.removeLiveIn(I);
+ // Add the newly computed live-ins.
+ auto &LiveIns = LiveMap[&B];
+ for (auto I : LiveIns) {
+ assert(I.Sub == 0);
+ B.addLiveIn(I.Reg);
+ }
+ }
+}
+
+
+void Liveness::resetKills() {
+ for (auto &B : DFG.getMF())
+ resetKills(&B);
+}
+
+
+void Liveness::resetKills(MachineBasicBlock *B) {
+ auto CopyLiveIns = [] (MachineBasicBlock *B, BitVector &LV) -> void {
+ for (auto I = B->livein_begin(), E = B->livein_end(); I != E; ++I)
+ LV.set(I->PhysReg);
+ };
+
+ BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs());
+ CopyLiveIns(B, LiveIn);
+ for (auto SI : B->successors())
+ CopyLiveIns(SI, Live);
+
+ for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (MI->isDebugValue())
+ continue;
+
+ MI->clearKillInfo();
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(R))
+ continue;
+ for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
+ Live.reset(*SR);
+ }
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(R))
+ continue;
+ bool IsLive = false;
+ for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) {
+ if (!Live[*SR])
+ continue;
+ IsLive = true;
+ break;
+ }
+ if (IsLive)
+ continue;
+ Op.setIsKill(true);
+ for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
+ Live.set(*SR);
+ }
+ }
+}
+
+
+// For shadows, determine if RR is aliased to a reaching def of any other
+// shadow associated with RA. If it is not, then RR is "restricted" to RA,
+// and so it can be considered a value specific to RA. This is important
+// for accurately determining values associated with phi uses.
+// For non-shadows, this function returns "true".
+bool Liveness::isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ RegisterRef RR) const {
+ NodeId Start = RA.Id;
+ for (NodeAddr<RefNode*> TA = DFG.getNextShadow(IA, RA);
+ TA.Id != 0 && TA.Id != Start; TA = DFG.getNextShadow(IA, TA)) {
+ NodeId RD = TA.Addr->getReachingDef();
+ if (RD == 0)
+ continue;
+ if (RAI.alias(RR, DFG.addr<DefNode*>(RD).Addr->getRegRef()))
+ return false;
+ }
+ return true;
+}
+
+
+RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
+ assert(DFG.IsRef<NodeAttrs::Use>(RA));
+ if (RA.Addr->getFlags() & NodeAttrs::Shadow) {
+ NodeId RD = RA.Addr->getReachingDef();
+ assert(RD);
+ RA = DFG.addr<DefNode*>(RD);
+ }
+ return RA.Addr->getRegRef();
+}
+
+
+unsigned Liveness::getPhysReg(RegisterRef RR) const {
+ if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+ return 0;
+ return RR.Sub ? TRI.getSubReg(RR.Reg, RR.Sub) : RR.Reg;
+}
+
+
+// Helper function to obtain the basic block containing the reaching def
+// of the given use.
+MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const {
+ auto F = NBMap.find(RN);
+ if (F != NBMap.end())
+ return F->second;
+ llvm_unreachable("Node id not in map");
+}
+
+
+void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
+ // The LiveIn map, for each (physical) register, contains the set of live
+ // reaching defs of that register that are live on entry to the associated
+ // block.
+
+ // The summary of the traversal algorithm:
+ //
+ // R is live-in in B, if there exists a U(R), such that rdef(R) dom B
+ // and (U \in IDF(B) or B dom U).
+ //
+ // for (C : children) {
+ // LU = {}
+ // traverse(C, LU)
+ // LiveUses += LU
+ // }
+ //
+ // LiveUses -= Defs(B);
+ // LiveUses += UpwardExposedUses(B);
+ // for (C : IIDF[B])
+ // for (U : LiveUses)
+ // if (Rdef(U) dom C)
+ // C.addLiveIn(U)
+ //
+
+ // Go up the dominator tree (depth-first).
+ MachineDomTreeNode *N = MDT.getNode(B);
+ for (auto I : *N) {
+ RefMap L;
+ MachineBasicBlock *SB = I->getBlock();
+ traverse(SB, L);
+
+ for (auto S : L)
+ LiveIn[S.first].insert(S.second.begin(), S.second.end());
+ }
+
+ if (Trace) {
+ dbgs() << LLVM_FUNCTION_NAME << " in BB#" << B->getNumber()
+ << " after recursion into";
+ for (auto I : *N)
+ dbgs() << ' ' << I->getBlock()->getNumber();
+ dbgs() << "\n LiveIn: " << Print<RefMap>(LiveIn, DFG);
+ dbgs() << "\n Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Add phi uses that are live on exit from this block.
+ RefMap &PUs = PhiLOX[B];
+ for (auto S : PUs)
+ LiveIn[S.first].insert(S.second.begin(), S.second.end());
+
+ if (Trace) {
+ dbgs() << "after LOX\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Stop tracking all uses defined in this block: erase those records
+ // where the reaching def is located in B and which cover all reached
+ // uses.
+ auto Copy = LiveIn;
+ LiveIn.clear();
+
+ for (auto I : Copy) {
+ auto &Defs = LiveIn[I.first];
+ NodeSet Rest;
+ for (auto R : I.second) {
+ auto DA = DFG.addr<DefNode*>(R);
+ RegisterRef DDR = DA.Addr->getRegRef();
+ NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ // Defs from a different block need to be preserved. Defs from this
+ // block will need to be processed further, except for phi defs, the
+ // liveness of which is handled through the PhiLON/PhiLOX maps.
+ if (B != BA.Addr->getCode())
+ Defs.insert(R);
+ else {
+ bool IsPreserving = DA.Addr->getFlags() & NodeAttrs::Preserving;
+ if (IA.Addr->getKind() != NodeAttrs::Phi && !IsPreserving) {
+ bool Covering = RAI.covers(DDR, I.first);
+ NodeId U = DA.Addr->getReachedUse();
+ while (U && Covering) {
+ auto DUA = DFG.addr<UseNode*>(U);
+ RegisterRef Q = DUA.Addr->getRegRef();
+ Covering = RAI.covers(DA.Addr->getRegRef(), Q);
+ U = DUA.Addr->getSibling();
+ }
+ if (!Covering)
+ Rest.insert(R);
+ }
+ }
+ }
+
+ // Non-covering defs from B.
+ for (auto R : Rest) {
+ auto DA = DFG.addr<DefNode*>(R);
+ RegisterRef DRR = DA.Addr->getRegRef();
+ RegisterSet RRs;
+ for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) {
+ NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ // Preserving defs do not count towards covering.
+ if (!(TA.Addr->getFlags() & NodeAttrs::Preserving))
+ RRs.insert(TA.Addr->getRegRef());
+ if (BA.Addr->getCode() == B)
+ continue;
+ if (RAI.covers(RRs, DRR))
+ break;
+ Defs.insert(TA.Id);
+ }
+ }
+ }
+
+ emptify(LiveIn);
+
+ if (Trace) {
+ dbgs() << "after defs in block\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Scan the block for upward-exposed uses and add them to the tracking set.
+ for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) {
+ NodeAddr<InstrNode*> IA = I;
+ if (IA.Addr->getKind() != NodeAttrs::Stmt)
+ continue;
+ for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+ RegisterRef RR = UA.Addr->getRegRef();
+ for (auto D : getAllReachingDefs(UA))
+ if (getBlockWithRef(D.Id) != B)
+ LiveIn[RR].insert(D.Id);
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "after uses in block\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Phi uses should not be propagated up the dominator tree, since they
+ // are not dominated by their corresponding reaching defs.
+ auto &Local = LiveMap[B];
+ auto &LON = PhiLON[B];
+ for (auto R : LON)
+ Local.insert(R.first);
+
+ if (Trace) {
+ dbgs() << "after phi uses in block\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(Local, DFG) << '\n';
+ }
+
+ for (auto C : IIDF[B]) {
+ auto &LiveC = LiveMap[C];
+ for (auto S : LiveIn)
+ for (auto R : S.second)
+ if (MDT.properlyDominates(getBlockWithRef(R), C))
+ LiveC.insert(S.first);
+ }
+}
+
+
+void Liveness::emptify(RefMap &M) {
+ for (auto I = M.begin(), E = M.end(); I != E; )
+ I = I->second.empty() ? M.erase(I) : std::next(I);
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h
new file mode 100644
index 0000000..4c1e8f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h
@@ -0,0 +1,106 @@
+//===--- RDFLiveness.h ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Recalculate the liveness information given a data flow graph.
+// This includes block live-ins and kill flags.
+
+#ifndef RDF_LIVENESS_H
+#define RDF_LIVENESS_H
+
+#include "RDFGraph.h"
+#include "llvm/ADT/DenseMap.h"
+#include <map>
+
+using namespace llvm;
+
+namespace llvm {
+ class MachineBasicBlock;
+ class MachineFunction;
+ class MachineRegisterInfo;
+ class TargetRegisterInfo;
+ class MachineDominatorTree;
+ class MachineDominanceFrontier;
+}
+
+namespace rdf {
+ struct Liveness {
+ public:
+ typedef std::map<MachineBasicBlock*,RegisterSet> LiveMapType;
+ typedef std::map<RegisterRef,NodeSet> RefMap;
+
+ Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
+ : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()),
+ RAI(g.getRAI()), MRI(mri), Empty(), Trace(false) {}
+
+ NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+ bool FullChain = false, const RegisterSet &DefRRs = RegisterSet());
+ NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA);
+
+ LiveMapType &getLiveMap() { return LiveMap; }
+ const LiveMapType &getLiveMap() const { return LiveMap; }
+ const RefMap &getRealUses(NodeId P) const {
+ auto F = RealUseMap.find(P);
+ return F == RealUseMap.end() ? Empty : F->second;
+ }
+
+ void computePhiInfo();
+ void computeLiveIns();
+ void resetLiveIns();
+ void resetKills();
+ void resetKills(MachineBasicBlock *B);
+
+ void trace(bool T) { Trace = T; }
+
+ private:
+ const DataFlowGraph &DFG;
+ const TargetRegisterInfo &TRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const RegisterAliasInfo &RAI;
+ MachineRegisterInfo &MRI;
+ LiveMapType LiveMap;
+ const RefMap Empty;
+ bool Trace;
+
+ // Cache of mapping from node ids (for RefNodes) to the containing
+ // basic blocks. Not computing it each time for each node reduces
+ // the liveness calculation time by a large fraction.
+ typedef DenseMap<NodeId,MachineBasicBlock*> NodeBlockMap;
+ NodeBlockMap NBMap;
+
+ // Phi information:
+ //
+ // map: NodeId -> (map: RegisterRef -> NodeSet)
+ // phi id -> (map: register -> set of reached non-phi uses)
+ std::map<NodeId, RefMap> RealUseMap;
+
+ // Inverse iterated dominance frontier.
+ std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
+
+ // Live on entry.
+ std::map<MachineBasicBlock*,RefMap> PhiLON;
+
+ // Phi uses are considered to be located at the end of the block that
+ // they are associated with. The reaching def of a phi use dominates the
+ // block that the use corresponds to, but not the block that contains
+ // the phi itself. To include these uses in the liveness propagation (up
+ // the dominator tree), create a map: block -> set of uses live on exit.
+ std::map<MachineBasicBlock*,RefMap> PhiLOX;
+
+ bool isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ RegisterRef RR) const;
+ RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const;
+ unsigned getPhysReg(RegisterRef RR) const;
+ MachineBasicBlock *getBlockWithRef(NodeId RN) const;
+ void traverse(MachineBasicBlock *B, RefMap &LiveIn);
+ void emptify(RefMap &M);
+ };
+}
+
+#endif // RDF_LIVENESS_H
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 6756c17..5680130 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -277,8 +277,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
@@ -327,6 +325,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
@@ -872,7 +872,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return lowerJumpTable(Op, DAG);
case ISD::SELECT: return lowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
case ISD::SETCC: return lowerSETCC(Op, DAG);
case ISD::VASTART: return lowerVASTART(Op, DAG);
case ISD::VAARG: return lowerVAARG(Op, DAG);
@@ -1648,20 +1647,6 @@ lowerSELECT(SDValue Op, SelectionDAG &DAG) const
SDLoc(Op));
}
-SDValue MipsTargetLowering::
-lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDLoc DL(Op);
- EVT Ty = Op.getOperand(0).getValueType();
- SDValue Cond =
- DAG.getNode(ISD::SETCC, DL, getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), Ty),
- Op.getOperand(0), Op.getOperand(1), Op.getOperand(4));
-
- return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2),
- Op.getOperand(3));
-}
-
SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op);
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index b33e125..0dc683e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -430,7 +430,6 @@ namespace llvm {
SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index d9fb8c8..ffda491 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -1003,7 +1003,7 @@ class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLink<string opstr, DAGOperand opnd> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> {
+ [(MipsJmpLink tglobaladdr:$target)], II_JAL, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTarget";
}
@@ -2075,8 +2075,6 @@ def : MipsPat<(MipsSync (i32 immz)),
(SYNC 0)>, ISA_MIPS2;
// Call
-def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
- (JAL tglobaladdr:$dst)>;
def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
(JAL texternalsym:$dst)>;
//def : MipsPat<(MipsJmpLink GPR32:$dst),
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index e6f7fe9..d4aeaf9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -544,8 +544,6 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2);
MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc));
MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc));
- LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
- HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
// Add lo/hi registers if the mtlo/hi instructions created have explicit
// def registers.
@@ -556,6 +554,9 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
LoInst.addReg(DstLo, RegState::Define);
HiInst.addReg(DstHi, RegState::Define);
}
+
+ LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
+ HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
}
void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7663696..be735f6 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4549,6 +4549,7 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete static_cast<NVPTXSection *>(DwarfLocSection);
delete static_cast<NVPTXSection *>(DwarfARangesSection);
delete static_cast<NVPTXSection *>(DwarfRangesSection);
+ delete static_cast<NVPTXSection *>(DwarfMacinfoSection);
}
MCSection *
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 0f88ddf..683b9a3 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,6 +41,7 @@ public:
DwarfLocSection = nullptr;
DwarfARangesSection = nullptr;
DwarfRangesSection = nullptr;
+ DwarfMacinfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
@@ -81,6 +82,8 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
DwarfRangesSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfMacinfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9a63c14..ec354c2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1092,8 +1092,28 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
}
// ELFv2 ABI - Normal entry label.
- if (Subtarget->isELFv2ABI())
+ if (Subtarget->isELFv2ABI()) {
+ // In the Large code model, we allow arbitrary displacements between
+ // the text section and its associated TOC section. We place the
+ // full 8-byte offset to the TOC in memory immediatedly preceding
+ // the function global entry point.
+ if (TM.getCodeModel() == CodeModel::Large
+ && !MF->getRegInfo().use_empty(PPC::X2)) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+
+ MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+ MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol();
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+ MCSymbolRefExpr::create(GlobalEPSymbol,
+ OutContext),
+ OutContext);
+
+ OutStreamer->EmitLabel(PPCFI->getTOCOffsetSymbol());
+ OutStreamer->EmitValue(TOCDeltaExpr, 8);
+ }
return AsmPrinter::EmitFunctionEntryLabel();
+ }
// Emit an official procedure descriptor.
MCSectionSubPair Current = OutStreamer->getCurrentSection();
@@ -1160,10 +1180,25 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
// thus emit a prefix sequence along the following lines:
//
// func:
+ // .Lfunc_gepNN:
+ // # global entry point
+ // addis r2,r12,(.TOC.-.Lfunc_gepNN)@ha
+ // addi r2,r2,(.TOC.-.Lfunc_gepNN)@l
+ // .Lfunc_lepNN:
+ // .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
+ // # local entry point, followed by function body
+ //
+ // For the Large code model, we create
+ //
+ // .Lfunc_tocNN:
+ // .quad .TOC.-.Lfunc_gepNN # done by EmitFunctionEntryLabel
+ // func:
+ // .Lfunc_gepNN:
// # global entry point
- // addis r2,r12,(.TOC.-func)@ha
- // addi r2,r2,(.TOC.-func)@l
- // .localentry func, .-func
+ // ld r2,.Lfunc_tocNN-.Lfunc_gepNN(r12)
+ // add r2,r2,r12
+ // .Lfunc_lepNN:
+ // .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
// # local entry point, followed by function body
//
// This ensures we have r2 set up correctly while executing the function
@@ -1171,32 +1206,49 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
if (Subtarget->isELFv2ABI()
// Only do all that if the function uses r2 in the first place.
&& !MF->getRegInfo().use_empty(PPC::X2)) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
- MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
+ MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
OutStreamer->EmitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
- MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
- const MCExpr *TOCDeltaExpr =
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
- GlobalEntryLabelExp, OutContext);
+ if (TM.getCodeModel() != CodeModel::Large) {
+ MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+ GlobalEntryLabelExp, OutContext);
- const MCExpr *TOCDeltaHi =
- PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
- .addReg(PPC::X2)
- .addReg(PPC::X12)
- .addExpr(TOCDeltaHi));
-
- const MCExpr *TOCDeltaLo =
- PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
- .addReg(PPC::X2)
- .addReg(PPC::X2)
- .addExpr(TOCDeltaLo));
-
- MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
+ const MCExpr *TOCDeltaHi =
+ PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
+
+ const MCExpr *TOCDeltaLo =
+ PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
+ } else {
+ MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol();
+ const MCExpr *TOCOffsetDeltaExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCOffset, OutContext),
+ GlobalEntryLabelExp, OutContext);
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addExpr(TOCOffsetDeltaExpr)
+ .addReg(PPC::X12));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD8)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12));
+ }
+
+ MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol();
OutStreamer->EmitLabel(LocalEntryLabel);
const MCSymbolRefExpr *LocalEntryLabelExp =
MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 075e093..79e4fe3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -299,22 +299,35 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
// 64-bit CR instructions
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let hasSideEffects = 0 in {
+// mtocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraDefRegAllocReq = 1 in {
def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
"mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
+// is dependent on the cr fields being set.
def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
"mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraDefRegAllocReq = 1
-let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+// mfocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraSrcRegAllocReq = 1 in {
def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
+// is dependent on the cr fields being copied.
def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
"mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index c17603a..dcff6ad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -744,20 +744,43 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
"isel is for regular integer GPRs only");
unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
- unsigned SelectPred = Cond[0].getImm();
+ auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
unsigned SubIdx;
bool SwapOps;
switch (SelectPred) {
- default: llvm_unreachable("invalid predicate for isel");
- case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
- case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
- case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
- case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
- case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
- case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
- case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
- case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ case PPC::PRED_EQ:
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_EQ_PLUS:
+ SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE:
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_NE_PLUS:
+ SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT:
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LT_PLUS:
+ SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE:
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GE_PLUS:
+ SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT:
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_GT_PLUS:
+ SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE:
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_LE_PLUS:
+ SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN:
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_UN_PLUS:
+ SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU:
+ case PPC::PRED_NU_MINUS:
+ case PPC::PRED_NU_PLUS:
+ SubIdx = PPC::sub_un; SwapOps = true; break;
case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6c4364a..ce0f9e6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2299,22 +2299,35 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
let hasSideEffects = 0 in {
+// mtocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraDefRegAllocReq = 1 in {
def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
"mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
+// is dependent on the cr fields being set.
def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
"mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraDefRegAllocReq = 1
-let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+// mfocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraSrcRegAllocReq = 1 in {
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
+// is dependent on the cr fields being copied.
def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
"mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
// Pseudo instruction to perform FADD in round-to-zero mode.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 95f1631..9d91e31 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -23,3 +23,24 @@ MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
Twine(MF.getFunctionNumber()) +
"$poff");
}
+
+MCSymbol *PPCFunctionInfo::getGlobalEPSymbol() const {
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "func_gep" +
+ Twine(MF.getFunctionNumber()));
+}
+
+MCSymbol *PPCFunctionInfo::getLocalEPSymbol() const {
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "func_lep" +
+ Twine(MF.getFunctionNumber()));
+}
+
+MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const {
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "func_toc" +
+ Twine(MF.getFunctionNumber()));
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 607cdf6..10a8ce0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -197,6 +197,10 @@ public:
bool usesPICBase() const { return UsesPICBase; }
MCSymbol *getPICOffsetSymbol() const;
+
+ MCSymbol *getGlobalEPSymbol() const;
+ MCSymbol *getLocalEPSymbol() const;
+ MCSymbol *getTOCOffsetSymbol() const;
};
} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 2dc0d82..a9d2e88 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -99,6 +99,11 @@ protected:
break;
}
+ // Don't really need to save data to the stack - the clobbered
+ // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
+ // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0);
+
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
.addReg(InReg);
@@ -113,6 +118,8 @@ protected:
.addReg(GPR3));
Call->addOperand(MI->getOperand(3));
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
+
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 733027a..05006ac 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -83,7 +83,6 @@ static bool IsIntegerCC(unsigned CC)
return (CC <= SPCC::ICC_VC);
}
-
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
{
switch(CC) {
@@ -124,106 +123,103 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
llvm_unreachable("Invalid cond code");
}
-bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const
-{
-
- MachineBasicBlock::iterator I = MBB.end();
- MachineBasicBlock::iterator UnCondBrIter = MBB.end();
- while (I != MBB.begin()) {
- --I;
+static bool isUncondBranchOpcode(int Opc) { return Opc == SP::BA; }
- if (I->isDebugValue())
- continue;
+static bool isCondBranchOpcode(int Opc) {
+ return Opc == SP::FBCOND || Opc == SP::BCOND;
+}
- // When we see a non-terminator, we are done.
- if (!isUnpredicatedTerminator(I))
- break;
+static bool isIndirectBranchOpcode(int Opc) {
+ return Opc == SP::BINDrr || Opc == SP::BINDri;
+}
- // Terminator is not a branch.
- if (!I->isBranch())
- return true;
+static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(1).getImm()));
+ Target = LastInst->getOperand(0).getMBB();
+}
- // Handle Unconditional branches.
- if (I->getOpcode() == SP::BA) {
- UnCondBrIter = I;
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return false;
+
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ parseCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
- if (!AllowModify) {
- TBB = I->getOperand(0).getMBB();
- continue;
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
}
+ }
+ }
- while (std::next(I) != MBB.end())
- std::next(I)->eraseFromParent();
-
- Cond.clear();
- FBB = nullptr;
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
- if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = nullptr;
- I->eraseFromParent();
- I = MBB.end();
- UnCondBrIter = MBB.end();
- continue;
- }
+ // If the block ends with a B and a Bcc, handle it.
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ parseCondBranch(SecondLastInst, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
- TBB = I->getOperand(0).getMBB();
- continue;
- }
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed.
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ return false;
+ }
- unsigned Opcode = I->getOpcode();
- if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
- return true; // Unknown Opcode.
-
- SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
-
- if (Cond.empty()) {
- MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
- if (AllowModify && UnCondBrIter != MBB.end() &&
- MBB.isLayoutSuccessor(TargetBB)) {
-
- // Transform the code
- //
- // brCC L1
- // ba L2
- // L1:
- // ..
- // L2:
- //
- // into
- //
- // brnCC L2
- // L1:
- // ...
- // L2:
- //
- BranchCode = GetOppositeBranchCondition(BranchCode);
- MachineBasicBlock::iterator OldInst = I;
- BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
- .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
- BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
- .addMBB(TargetBB);
-
- OldInst->eraseFromParent();
- UnCondBrIter->eraseFromParent();
-
- UnCondBrIter = MBB.end();
- I = MBB.end();
- continue;
- }
- FBB = TBB;
- TBB = I->getOperand(0).getMBB();
- Cond.push_back(MachineOperand::CreateImm(BranchCode));
- continue;
- }
- // FIXME: Handle subsequent conditional branches.
- // For now, we can't handle multiple conditional branches.
+ // ...likewise if it ends with an indirect branch followed by an unconditional
+ // branch.
+ if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
return true;
}
- return false;
+
+ // Otherwise, can't handle this.
+ return true;
}
unsigned
@@ -277,6 +273,14 @@ unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
return Count;
}
+bool SparcInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1);
+ SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[0].getImm());
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
index 15673f1..9de624c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -76,6 +76,9 @@ public:
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const override;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index b9f2eb5..d5dabc2 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1219,6 +1219,9 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
// Atomic operations
//===----------------------------------------------------------------------===//
+// A serialization instruction that acts as a barrier for all memory
+// accesses, which expands to "bcr 14, 0".
+let hasSideEffects = 1 in
def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..5e55e29
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMWebAssemblyDisassembler
+ WebAssemblyDisassembler.cpp
+ )
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt b/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
new file mode 100644
index 0000000..a452ca1
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===-- ./lib/Target/WebAssembly/Disassembler/LLVMBuild.txt -----*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyDisassembler
+parent = WebAssembly
+required_libraries = MCDisassembler WebAssemblyInfo Support
+add_to_library_groups = WebAssembly
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile b/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile
new file mode 100644
index 0000000..bcd36ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===-- lib/Target/WebAssembly/Disassembler/Makefile -------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyDisassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
new file mode 100644
index 0000000..0143b10
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -0,0 +1,148 @@
+//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file is part of the WebAssembly Disassembler.
+///
+/// It contains code to translate the data produced by the decoder into
+/// MCInsts.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-disassembler"
+
+namespace {
+class WebAssemblyDisassembler final : public MCDisassembler {
+ std::unique_ptr<const MCInstrInfo> MCII;
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+
+public:
+ WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ std::unique_ptr<const MCInstrInfo> MCII)
+ : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
+};
+} // end anonymous namespace
+
+static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
+ return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
+}
+
+extern "C" void LLVMInitializeWebAssemblyDisassembler() {
+ // Register the disassembler for each target.
+ TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget32,
+ createWebAssemblyDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget64,
+ createWebAssemblyDisassembler);
+}
+
+MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
+ MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
+ raw_ostream &OS, raw_ostream &CS) const {
+ Size = 0;
+ uint64_t Pos = 0;
+
+ // Read the opcode.
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Opcode = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+
+ if (Opcode >= WebAssembly::INSTRUCTION_LIST_END)
+ return MCDisassembler::Fail;
+
+ MI.setOpcode(Opcode);
+ const MCInstrDesc &Desc = MCII->get(Opcode);
+ unsigned NumFixedOperands = Desc.NumOperands;
+
+ // If it's variadic, read the number of extra operands.
+ unsigned NumExtraOperands = 0;
+ if (Desc.isVariadic()) {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ NumExtraOperands = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ }
+
+ // Read the fixed operands. These are described by the MCInstrDesc.
+ for (unsigned i = 0; i < NumFixedOperands; ++i) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ switch (Info.OperandType) {
+ case MCOI::OPERAND_IMMEDIATE:
+ case WebAssembly::OPERAND_BASIC_BLOCK: {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ MI.addOperand(MCOperand::createImm(Imm));
+ break;
+ }
+ case MCOI::OPERAND_REGISTER: {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ MI.addOperand(MCOperand::createReg(Reg));
+ break;
+ }
+ case WebAssembly::OPERAND_FPIMM: {
+ // TODO: MC converts all floating point immediate operands to double.
+ // This is fine for numeric values, but may cause NaNs to change bits.
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Bits = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ double Imm;
+ memcpy(&Imm, &Bits, sizeof(Imm));
+ MI.addOperand(MCOperand::createFPImm(Imm));
+ break;
+ }
+ default:
+ llvm_unreachable("unimplemented operand kind");
+ }
+ }
+
+ // Read the extra operands.
+ assert(NumExtraOperands == 0 || Desc.isVariadic());
+ for (unsigned i = 0; i < NumExtraOperands; ++i) {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ if (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate) {
+ // Decode extra immediate operands.
+ uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
+ MI.addOperand(MCOperand::createImm(Imm));
+ } else {
+ // Decode extra register operands.
+ uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
+ MI.addOperand(MCOperand::createReg(Reg));
+ }
+ Pos += sizeof(uint64_t);
+ }
+
+ Size = Pos;
+ return MCDisassembler::Success;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 7ce3a00..9a95150 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -16,6 +16,8 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -33,7 +35,7 @@ using namespace llvm;
WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
+ : MCInstPrinter(MAI, MII, MRI), ControlFlowCounter(0) {}
void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
@@ -59,6 +61,52 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// Print any added annotation.
printAnnotation(OS, Annot);
+
+ if (CommentStream) {
+ // Observe any effects on the control flow stack, for use in annotating
+ // control flow label references.
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case WebAssembly::LOOP: {
+ // Grab the TopLabel value first so that labels print in numeric order.
+ uint64_t TopLabel = ControlFlowCounter++;
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+ printAnnotation(OS, "label" + utostr(TopLabel) + ':');
+ ControlFlowStack.push_back(std::make_pair(TopLabel, true));
+ break;
+ }
+ case WebAssembly::BLOCK:
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+ break;
+ case WebAssembly::END_LOOP:
+ ControlFlowStack.pop_back();
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ break;
+ case WebAssembly::END_BLOCK:
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ break;
+ }
+
+ // Annotate any control flow label references.
+ unsigned NumFixedOperands = Desc.NumOperands;
+ SmallSet<uint64_t, 8> Printed;
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ if (!(i < NumFixedOperands
+ ? (Info.OperandType == WebAssembly::OPERAND_BASIC_BLOCK)
+ : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
+ continue;
+ uint64_t Depth = MI->getOperand(i).getImm();
+ if (!Printed.insert(Depth).second)
+ continue;
+ const auto &Pair = ControlFlowStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") +
+ " to label" + utostr(Pair.first));
+ }
+ }
}
static std::string toString(const APFloat &FP) {
@@ -82,6 +130,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ MII.get(MI->getOpcode()).TSFlags == 0) &&
+ "WebAssembly variable_ops register ops don't use TSFlags");
unsigned WAReg = Op.getReg();
if (int(WAReg) >= 0)
printRegName(O, WAReg);
@@ -95,19 +146,27 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
O << '=';
} else if (Op.isImm()) {
- switch (MI->getOpcode()) {
- case WebAssembly::PARAM:
- case WebAssembly::RESULT:
- case WebAssembly::LOCAL:
- O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm()));
- break;
- default:
- O << Op.getImm();
- break;
- }
- } else if (Op.isFPImm())
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ (MII.get(MI->getOpcode()).TSFlags &
+ WebAssemblyII::VariableOpIsImmediate)) &&
+ "WebAssemblyII::VariableOpIsImmediate should be set for "
+ "variable_ops immediate ops");
+ // TODO: (MII.get(MI->getOpcode()).TSFlags &
+ // WebAssemblyII::VariableOpImmediateIsLabel)
+ // can tell us whether this is an immediate referencing a label in the
+ // control flow stack, and it may be nice to pretty-print.
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ MII.get(MI->getOpcode()).TSFlags == 0) &&
+ "WebAssembly variable_ops floating point ops don't use TSFlags");
O << toString(APFloat(Op.getFPImm()));
- else {
+ } else {
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ (MII.get(MI->getOpcode()).TSFlags &
+ WebAssemblyII::VariableOpIsImmediate)) &&
+ "WebAssemblyII::VariableOpIsImmediate should be set for "
+ "variable_ops expr ops");
assert(Op.isExpr() && "unknown operand kind in printOperand");
Op.getExpr()->print(O, &MAI);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 39a16f5..cd6c59a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -23,6 +23,9 @@ namespace llvm {
class MCSubtargetInfo;
class WebAssemblyInstPrinter final : public MCInstPrinter {
+ uint64_t ControlFlowCounter;
+ SmallVector<std::pair<uint64_t, bool>, 0> ControlFlowStack;
+
public:
WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index b158ccb..bba06f6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -95,9 +95,6 @@ WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
}
} // end anonymous namespace
-MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- const Triple &TT,
- StringRef CPU) {
+MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) {
return new WebAssemblyAsmBackend(TT.isArch64Bit());
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
index c47a3d9..2bb58b3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
@@ -30,19 +30,31 @@ protected:
};
} // end anonymous namespace
-// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF
-// writing seriously, we should email generic-abi@googlegroups.com and ask
-// for our own ELF code.
WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit,
uint8_t OSABI)
- : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE,
- /*HasRelocationAddend=*/true) {}
+ : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_WEBASSEMBLY,
+ /*HasRelocationAddend=*/false) {}
unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- // FIXME: Do we need our own relocs?
- return Fixup.getKind();
+ // WebAssembly functions are not allocated in the address space. To resolve a
+ // pointer to a function, we must use a special relocation type.
+ if (const MCSymbolRefExpr *SyExp =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (SyExp->getKind() == MCSymbolRefExpr::VK_WebAssembly_FUNCTION)
+ return ELF::R_WEBASSEMBLY_FUNCTION;
+
+ switch (Fixup.getKind()) {
+ case FK_Data_4:
+ assert(!is64Bit() && "4-byte relocations only supported on wasm32");
+ return ELF::R_WEBASSEMBLY_DATA;
+ case FK_Data_8:
+ assert(is64Bit() && "8-byte relocations only supported on wasm64");
+ return ELF::R_WEBASSEMBLY_DATA;
+ default:
+ llvm_unreachable("unimplemented fixup kind");
+ }
}
MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index d261779..02c717a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -27,11 +27,12 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
// TODO: What should MaxInstLength be?
- PrivateGlobalPrefix = "";
- PrivateLabelPrefix = "";
-
UseDataRegionDirectives = true;
+ // Use .skip instead of .zero because .zero is confusing when used with two
+ // arguments (it doesn't actually zero things out).
+ ZeroDirective = "\t.skip\t";
+
Data8bitsDirective = "\t.int8\t";
Data16bitsDirective = "\t.int16\t";
Data32bitsDirective = "\t.int32\t";
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 7c6c79e..f409bd7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCFixup.h"
@@ -26,75 +27,66 @@ using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumFixups, "Number of MC fixups created.");
+
namespace {
class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
- const MCRegisterInfo &MRI;
-
-public:
- WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri,
- MCContext &)
- : MRI(mri) {}
+ const MCInstrInfo &MCII;
+ const MCContext &Ctx;
- ~WebAssemblyMCCodeEmitter() override {}
-
- /// TableGen'erated function for getting the binary encoding for an
- /// instruction.
+ // Implementation generated by tablegen.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// Return binary encoding of operand. If the machine operand requires
- /// relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+
+public:
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {}
};
} // end anonymous namespace
MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
- return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx);
-}
-
-unsigned WebAssemblyMCCodeEmitter::getMachineOpValue(
- const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg())
- return MRI.getEncodingValue(MO.getReg());
- if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
-
- assert(MO.isExpr());
-
- assert(MO.getExpr()->getKind() == MCExpr::SymbolRef);
-
- assert(false && "FIXME: not implemented yet");
-
- return 0;
+ return new WebAssemblyMCCodeEmitter(MCII, Ctx);
}
void WebAssemblyMCCodeEmitter::encodeInstruction(
const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- assert(false && "FIXME: not implemented yet");
-}
-
-// Encode WebAssembly Memory Operand
-uint64_t
-WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- assert(false && "FIXME: not implemented yet");
- return 0;
+ // FIXME: This is not the real binary encoding. This is an extremely
+ // over-simplified encoding where we just use uint64_t for everything. This
+ // is a temporary measure.
+ support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode());
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ if (Desc.isVariadic())
+ support::endian::Writer<support::little>(OS).write<uint64_t>(
+ MI.getNumOperands() - Desc.NumOperands);
+ for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg());
+ } else if (MO.isImm()) {
+ support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm());
+ } else if (MO.isExpr()) {
+ support::endian::Writer<support::little>(OS).write<uint64_t>(0);
+ Fixups.push_back(MCFixup::create(
+ (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t),
+ MO.getExpr(), STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
+ MI.getLoc()));
+ ++MCNumFixups;
+ } else {
+ llvm_unreachable("unexpected operand kind");
+ }
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
}
#include "WebAssemblyGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 14cd295..37000f1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -15,10 +15,10 @@
#include "WebAssemblyMCTargetDesc.h"
#include "InstPrinter/WebAssemblyInstPrinter.h"
#include "WebAssemblyMCAsmInfo.h"
+#include "WebAssemblyTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -35,52 +35,89 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "WebAssemblyGenRegisterInfo.inc"
-static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/,
- const Triple &TT) {
+static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
+ const Triple &TT) {
return new WebAssemblyMCAsmInfo(TT);
}
-static MCInstrInfo *createWebAssemblyMCInstrInfo() {
+static MCInstrInfo *createMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitWebAssemblyMCInstrInfo(X);
return X;
}
-static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx,
- MCAsmBackend &MAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+static MCRegisterInfo *createMCRegisterInfo(const Triple & /*T*/) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitWebAssemblyMCRegisterInfo(X, 0);
+ return X;
}
-static MCInstPrinter *
-createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant,
- const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI) {
+static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
assert(SyntaxVariant == 0);
return new WebAssemblyInstPrinter(MAI, MII, MRI);
}
+static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo & /*MRI*/,
+ MCContext &Ctx) {
+ return createWebAssemblyMCCodeEmitter(MCII, Ctx);
+}
+
+static MCAsmBackend *createAsmBackend(const Target & /*T*/,
+ const MCRegisterInfo & /*MRI*/,
+ const Triple &TT, StringRef /*CPU*/) {
+ return createWebAssemblyAsmBackend(TT);
+}
+
+static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
+ StringRef FS) {
+ return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo & /*STI*/) {
+ return new WebAssemblyTargetELFStreamer(S);
+}
+
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter * /*InstPrint*/,
+ bool /*isVerboseAsm*/) {
+ return new WebAssemblyTargetAsmStreamer(S, OS);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeWebAssemblyTargetMC() {
for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) {
// Register the MC asm info.
- RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
+ RegisterMCAsmInfoFn X(*T, createMCAsmInfo);
// Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(*T, createMCInstrInfo);
- // Register the object streamer
- TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer);
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createMCRegisterInfo);
// Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(*T, createMCInstPrinter);
+
+ // Register the MC code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(*T, createCodeEmitter);
+
+ // Register the ASM Backend.
+ TargetRegistry::RegisterMCAsmBackend(*T, createAsmBackend);
- // Register the MC code emitter
- TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter);
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createMCSubtargetInfo);
- // Register the ASM Backend
- TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend);
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index e78f73e..9bac4f8 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -15,40 +15,62 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
-class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
-class MCRegisterInfo;
class MCObjectWriter;
-class MCStreamer;
class MCSubtargetInfo;
-class MCTargetStreamer;
-class StringRef;
class Target;
class Triple;
-class raw_ostream;
class raw_pwrite_stream;
extern Target TheWebAssemblyTarget32;
extern Target TheWebAssemblyTarget64;
MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU);
+MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit, uint8_t OSABI);
+namespace WebAssembly {
+enum OperandType {
+ /// Basic block label in a branch construct.
+ OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
+ /// Floating-point immediate.
+ OPERAND_FPIMM
+};
+
+/// WebAssembly-specific directive identifiers.
+enum Directive {
+ // FIXME: This is not the real binary encoding.
+ DotParam = UINT64_MAX - 0, ///< .param
+ DotResult = UINT64_MAX - 1, ///< .result
+ DotLocal = UINT64_MAX - 2, ///< .local
+ DotEndFunc = UINT64_MAX - 3, ///< .endfunc
+};
+
+} // end namespace WebAssembly
+
+namespace WebAssemblyII {
+enum {
+ // For variadic instructions, this flag indicates whether an operand
+ // in the variable_ops range is an immediate value.
+ VariableOpIsImmediate = (1 << 0),
+ // For immediate values in the variable_ops range, this flag indicates
+ // whether the value represents a control-flow label.
+ VariableOpImmediateIsLabel = (1 << 1),
+};
+} // end namespace WebAssemblyII
+
} // end namespace llvm
// Defines symbolic names for WebAssembly registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
new file mode 100644
index 0000000..1d28228
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -0,0 +1,94 @@
+//==-- WebAssemblyTargetStreamer.cpp - WebAssembly Target Streamer Methods --=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines WebAssembly-specific target streamer classes.
+/// These are for implementing support for target-specific assembly directives.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetStreamer.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyTargetObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S) {}
+
+WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
+ MCStreamer &S, formatted_raw_ostream &OS)
+ : WebAssemblyTargetStreamer(S), OS(OS) {}
+
+WebAssemblyTargetELFStreamer::WebAssemblyTargetELFStreamer(MCStreamer &S)
+ : WebAssemblyTargetStreamer(S) {}
+
+static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
+ bool First = true;
+ for (MVT Type : Types) {
+ if (First)
+ First = false;
+ else
+ OS << ", ";
+ OS << WebAssembly::TypeToString(Type);
+ }
+ OS << '\n';
+}
+
+void WebAssemblyTargetAsmStreamer::emitParam(ArrayRef<MVT> Types) {
+ OS << "\t.param \t";
+ PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) {
+ OS << "\t.result \t";
+ PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
+ OS << "\t.local \t";
+ PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
+
+// FIXME: What follows is not the real binary encoding.
+
+static void EncodeTypes(MCStreamer &Streamer, ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(Types.size(), sizeof(uint64_t));
+ for (MVT Type : Types)
+ Streamer.EmitIntValue(Type.SimpleTy, sizeof(uint64_t));
+}
+
+void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(WebAssembly::DotParam, sizeof(uint64_t));
+ EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(WebAssembly::DotResult, sizeof(uint64_t));
+ EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(WebAssembly::DotLocal, sizeof(uint64_t));
+ EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitEndFunc() {
+ Streamer.EmitIntValue(WebAssembly::DotEndFunc, sizeof(uint64_t));
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
new file mode 100644
index 0000000..c66a515
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -0,0 +1,68 @@
+//==-- WebAssemblyTargetStreamer.h - WebAssembly Target Streamer -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares WebAssembly-specific target streamer classes.
+/// These are for implementing support for target-specific assembly directives.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
+
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class MCELFStreamer;
+
+/// WebAssembly-specific streamer interface, to implement support
+/// WebAssembly-specific assembly directives.
+class WebAssemblyTargetStreamer : public MCTargetStreamer {
+public:
+ explicit WebAssemblyTargetStreamer(MCStreamer &S);
+
+ /// .param
+ virtual void emitParam(ArrayRef<MVT> Types) = 0;
+ /// .result
+ virtual void emitResult(ArrayRef<MVT> Types) = 0;
+ /// .local
+ virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+ /// .endfunc
+ virtual void emitEndFunc() = 0;
+};
+
+/// This part is for ascii assembly output
+class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+
+ void emitParam(ArrayRef<MVT> Types) override;
+ void emitResult(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<MVT> Types) override;
+ void emitEndFunc() override;
+};
+
+/// This part is for ELF object output
+class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer {
+public:
+ explicit WebAssemblyTargetELFStreamer(MCStreamer &S);
+
+ void emitParam(ArrayRef<MVT> Types) override;
+ void emitResult(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<MVT> Types) override;
+ void emitEndFunc() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 0d2b4d9..45ac99d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "WebAssembly.h"
#include "InstPrinter/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "WebAssemblyMCInstLower.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRegisterInfo.h"
@@ -69,7 +70,9 @@ private:
void EmitJumpTableInfo() override;
void EmitConstantPool() override;
void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
void EmitInstruction(const MachineInstr *MI) override;
+ const MCExpr *lowerConstant(const Constant *CV) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
@@ -80,6 +83,7 @@ private:
MVT getRegType(unsigned RegNo) const;
const char *toString(MVT VT) const;
std::string regToString(const MachineOperand &MO);
+ WebAssemblyTargetStreamer *getTargetStreamer();
};
} // end anonymous namespace
@@ -90,9 +94,9 @@ private:
MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
const TargetRegisterClass *TRC =
- TargetRegisterInfo::isVirtualRegister(RegNo) ?
- MRI->getRegClass(RegNo) :
- MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+ TargetRegisterInfo::isVirtualRegister(RegNo)
+ ? MRI->getRegClass(RegNo)
+ : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
if (TRC->hasType(T))
return T;
@@ -101,6 +105,10 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
return MVT::Other;
}
+const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
+ return WebAssembly::TypeToString(VT);
+}
+
std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
unsigned RegNo = MO.getReg();
assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
@@ -111,8 +119,10 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
return '$' + utostr(WAReg);
}
-const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
- return WebAssembly::TypeToString(VT);
+WebAssemblyTargetStreamer *
+WebAssemblyAsmPrinter::getTargetStreamer() {
+ MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
+ return static_cast<WebAssemblyTargetStreamer *>(TS);
}
//===----------------------------------------------------------------------===//
@@ -145,29 +155,20 @@ static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
}
void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
- if (!MFI->getParams().empty()) {
- MCInst Param;
- Param.setOpcode(WebAssembly::PARAM);
- for (MVT VT : MFI->getParams())
- Param.addOperand(MCOperand::createImm(VT.SimpleTy));
- EmitToStreamer(*OutStreamer, Param);
- }
+ if (!MFI->getParams().empty())
+ getTargetStreamer()->emitParam(MFI->getParams());
SmallVector<MVT, 4> ResultVTs;
const Function &F(*MF->getFunction());
ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
+
// If the return type needs to be legalized it will get converted into
// passing a pointer.
- if (ResultVTs.size() == 1) {
- MCInst Result;
- Result.setOpcode(WebAssembly::RESULT);
- Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy));
- EmitToStreamer(*OutStreamer, Result);
- }
+ if (ResultVTs.size() == 1)
+ getTargetStreamer()->emitResult(ResultVTs);
bool AnyWARegs = false;
- MCInst Local;
- Local.setOpcode(WebAssembly::LOCAL);
+ SmallVector<MVT, 16> LocalTypes;
for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
unsigned WAReg = MFI->getWAReg(VReg);
@@ -180,22 +181,26 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
// Don't declare stackified registers.
if (int(WAReg) < 0)
continue;
- Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy));
+ LocalTypes.push_back(getRegType(VReg));
AnyWARegs = true;
}
auto &PhysRegs = MFI->getPhysRegs();
for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
if (PhysRegs[PReg] == -1U)
continue;
- Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy));
+ LocalTypes.push_back(getRegType(PReg));
AnyWARegs = true;
}
if (AnyWARegs)
- EmitToStreamer(*OutStreamer, Local);
+ getTargetStreamer()->emitLocal(LocalTypes);
AsmPrinter::EmitFunctionBodyStart();
}
+void WebAssemblyAsmPrinter::EmitFunctionBodyEnd() {
+ getTargetStreamer()->emitEndFunc();
+}
+
void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
@@ -207,10 +212,6 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// These represent values which are live into the function entry, so there's
// no instruction to emit.
break;
- case WebAssembly::LOOP_END:
- // This is a no-op which just exists to tell AsmPrinter.cpp that there's a
- // fallthrough which nevertheless requires a label for the destination here.
- break;
default: {
WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
@@ -221,6 +222,14 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
+const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ if (GV->getValueType()->isFunctionTy())
+ return MCSymbolRefExpr::create(
+ getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
+ return AsmPrinter::lowerConstant(CV);
+}
+
bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
unsigned OpNo, unsigned AsmVariant,
const char *ExtraCode,
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index e9671ee..a39349c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -256,7 +257,8 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
/// code) for a branch instruction to both branch to a block and fallthrough
/// to it, so we check the actual branch operands to see if there are any
/// explicit mentions.
-static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) {
+static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
+ MachineBasicBlock *MBB) {
for (MachineInstr &MI : Pred->terminators())
for (MachineOperand &MO : MI.explicit_operands())
if (MO.isMBB() && MO.getMBB() == MBB)
@@ -325,13 +327,21 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
InsertPos = Header->getFirstTerminator();
while (InsertPos != Header->begin() &&
prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) &&
- prev(InsertPos)->getOpcode() != WebAssembly::LOOP)
+ prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
+ prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
+ prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
--InsertPos;
}
// Add the BLOCK.
- BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
- .addMBB(&MBB);
+ BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK));
+
+ // Mark the end of the block.
+ InsertPos = MBB.begin();
+ while (InsertPos != MBB.end() &&
+ InsertPos->getOpcode() == WebAssembly::END_LOOP)
+ ++InsertPos;
+ BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK));
// Track the farthest-spanning scope that ends at this point.
int Number = MBB.getNumber();
@@ -341,10 +351,11 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
}
/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
-static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
- const WebAssemblyInstrInfo &TII,
- const MachineLoopInfo &MLI) {
+static void PlaceLoopMarker(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+ DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops,
+ const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
MachineLoop *Loop = MLI.getLoopFor(&MBB);
if (!Loop || Loop->getHeader() != &MBB)
return;
@@ -361,14 +372,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
Iter = next(MachineFunction::iterator(Bottom));
}
MachineBasicBlock *AfterLoop = &*Iter;
- BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP))
- .addMBB(AfterLoop);
- // Emit a special no-op telling the asm printer that we need a label to close
- // the loop scope, even though the destination is only reachable by
- // fallthrough.
- if (!Bottom->back().isBarrier())
- BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END));
+ // Mark the beginning of the loop (after the end of any existing loop that
+ // ends here).
+ auto InsertPos = MBB.begin();
+ while (InsertPos != MBB.end() &&
+ InsertPos->getOpcode() == WebAssembly::END_LOOP)
+ ++InsertPos;
+ BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP));
+
+ // Mark the end of the loop.
+ MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(),
+ TII.get(WebAssembly::END_LOOP));
+ LoopTops[End] = &MBB;
assert((!ScopeTops[AfterLoop->getNumber()] ||
ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -377,6 +393,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
ScopeTops[AfterLoop->getNumber()] = &MBB;
}
+static unsigned
+GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
+ const MachineBasicBlock *MBB) {
+ unsigned Depth = 0;
+ for (auto X : reverse(Stack)) {
+ if (X == MBB)
+ break;
+ ++Depth;
+ }
+ assert(Depth < Stack.size() && "Branch destination should be in scope");
+ return Depth;
+}
+
/// Insert LOOP and BLOCK markers at appropriate places.
static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
const WebAssemblyInstrInfo &TII,
@@ -388,25 +417,57 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
// we may insert at the end.
SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
+ // For eacn LOOP_END, the corresponding LOOP.
+ DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops;
+
for (auto &MBB : MF) {
// Place the LOOP for MBB if MBB is the header of a loop.
- PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI);
+ PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
// Place the BLOCK for MBB if MBB is branched to from above.
PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT);
}
-}
-#ifndef NDEBUG
-static bool
-IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack,
- const MachineBasicBlock *MBB) {
- for (const auto &Pair : Stack)
- if (Pair.first == MBB)
- return true;
- return false;
+ // Now rewrite references to basic blocks to be depth immediates.
+ SmallVector<const MachineBasicBlock *, 8> Stack;
+ for (auto &MBB : reverse(MF)) {
+ for (auto &MI : reverse(MBB)) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::BLOCK:
+ assert(ScopeTops[Stack.back()->getNumber()] == &MBB &&
+ "Block should be balanced");
+ Stack.pop_back();
+ break;
+ case WebAssembly::LOOP:
+ assert(Stack.back() == &MBB && "Loop top should be balanced");
+ Stack.pop_back();
+ Stack.pop_back();
+ break;
+ case WebAssembly::END_BLOCK:
+ Stack.push_back(&MBB);
+ break;
+ case WebAssembly::END_LOOP:
+ Stack.push_back(&MBB);
+ Stack.push_back(LoopTops[&MI]);
+ break;
+ default:
+ if (MI.isTerminator()) {
+ // Rewrite MBB operands to be depth immediates.
+ SmallVector<MachineOperand, 4> Ops(MI.operands());
+ while (MI.getNumOperands() > 0)
+ MI.RemoveOperand(MI.getNumOperands() - 1);
+ for (auto MO : Ops) {
+ if (MO.isMBB())
+ MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB()));
+ MI.addOperand(MF, MO);
+ }
+ }
+ break;
+ }
+ }
+ }
+ assert(Stack.empty() && "Control flow should be balanced");
}
-#endif
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** CFG Stackifying **********\n"
@@ -415,7 +476,9 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
const auto &MLI = getAnalysis<MachineLoopInfo>();
auto &MDT = getAnalysis<MachineDominatorTree>();
+ // Liveness is not tracked for EXPR_STACK physreg.
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ MF.getRegInfo().invalidateLiveness();
// RPO sorting needs all loops to be single-entry.
EliminateMultipleEntryLoops(MF, MLI);
@@ -426,43 +489,5 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
// Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
PlaceMarkers(MF, MLI, TII, MDT);
-#ifndef NDEBUG
- // Verify that block and loop beginnings and endings are in LIFO order, and
- // that all references to blocks are to blocks on the stack at the point of
- // the reference.
- SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack;
- for (auto &MBB : MF) {
- while (!Stack.empty() && Stack.back().first == &MBB)
- if (Stack.back().second) {
- assert(Stack.size() >= 2);
- Stack.pop_back();
- Stack.pop_back();
- } else {
- assert(Stack.size() >= 1);
- Stack.pop_back();
- }
- for (auto &MI : MBB)
- switch (MI.getOpcode()) {
- case WebAssembly::LOOP:
- Stack.push_back(std::make_pair(&MBB, false));
- Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true));
- break;
- case WebAssembly::BLOCK:
- Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false));
- break;
- default:
- // Verify that all referenced blocks are in scope. A reference to a
- // block with a negative number is invalid, but can happen with inline
- // asm, so we shouldn't assert on it, but instead let CodeGen properly
- // fail on it.
- for (const MachineOperand &MO : MI.explicit_operands())
- if (MO.isMBB() && MO.getMBB()->getNumber() >= 0)
- assert(IsOnStack(Stack, MO.getMBB()));
- break;
- }
- }
- assert(Stack.empty());
-#endif
-
return true;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 7a89f78..e9933b0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -573,7 +573,8 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
SDLoc DL(Op);
const auto *GA = cast<GlobalAddressSDNode>(Op);
EVT VT = Op.getValueType();
- assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ assert(GA->getTargetFlags() == 0 &&
+ "Unexpected target flags on generic GlobalAddressSDNode");
if (GA->getAddressSpace() != 0)
fail(DL, DAG, "WebAssembly only expects the 0 address space");
return DAG.getNode(
@@ -587,9 +588,16 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
SDLoc DL(Op);
const auto *ES = cast<ExternalSymbolSDNode>(Op);
EVT VT = Op.getValueType();
- assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ assert(ES->getTargetFlags() == 0 &&
+ "Unexpected target flags on generic ExternalSymbolSDNode");
+ // Set the TargetFlags to 0x1 which indicates that this is a "function"
+ // symbol rather than a data symbol. We do this unconditionally even though
+ // we don't know anything about the symbol other than its name, because all
+ // external symbols used in target-independent SelectionDAG code are for
+ // functions.
return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
- DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
+ DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
+ /*TargetFlags=*/0x1));
}
SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 05efe89..fda9595 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -41,28 +41,33 @@ let Defs = [ARGUMENTS] in {
// TODO: SelectionDAG's lowering insists on using a pointer as the index for
// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode
// currently.
+// Set TSFlags{0} to 1 to indicate that the variable_ops are immediates.
+// Set TSFlags{1} to 1 to indicate that the immediates represent labels.
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops),
[(WebAssemblytableswitch I32:$index, bb:$default)],
- "tableswitch\t$index, $default">;
+ "tableswitch\t$index, $default"> {
+ let TSFlags{0} = 1;
+ let TSFlags{1} = 1;
+}
def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops),
[(WebAssemblytableswitch I64:$index, bb:$default)],
- "tableswitch\t$index, $default">;
+ "tableswitch\t$index, $default"> {
+ let TSFlags{0} = 1;
+ let TSFlags{1} = 1;
+}
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
-// Placemarkers to indicate the start of a block or loop scope. These
+// Placemarkers to indicate the start or end of a block or loop scope. These
// use/clobber EXPR_STACK to prevent them from being moved into the middle of
// an expression tree.
let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
-def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">;
-def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">;
+def BLOCK : I<(outs), (ins), [], "block">;
+def LOOP : I<(outs), (ins), [], "loop">;
+def END_BLOCK : I<(outs), (ins), [], "end_block">;
+def END_LOOP : I<(outs), (ins), [], "end_loop">;
} // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
-// No-op to indicate to the AsmPrinter that a loop ends here, so a
-// basic block label is needed even if it wouldn't otherwise appear so.
-let isTerminator = 1, hasCtrlDep = 1 in
-def LOOP_END : I<(outs), (ins), []>;
-
multiclass RETURN<WebAssemblyRegClass vt> {
def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
"return \t$val">;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 5e7663c..028e9af 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -74,6 +74,9 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
case WebAssembly::BR_IF:
if (HaveCond)
return true;
+ // If we're running after CFGStackify, we can't optimize further.
+ if (!MI.getOperand(1).isMBB())
+ return true;
Cond.push_back(MachineOperand::CreateImm(true));
Cond.push_back(MI.getOperand(0));
TBB = MI.getOperand(1).getMBB();
@@ -82,12 +85,18 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
case WebAssembly::BR_UNLESS:
if (HaveCond)
return true;
+ // If we're running after CFGStackify, we can't optimize further.
+ if (!MI.getOperand(1).isMBB())
+ return true;
Cond.push_back(MachineOperand::CreateImm(false));
Cond.push_back(MI.getOperand(0));
TBB = MI.getOperand(1).getMBB();
HaveCond = true;
break;
case WebAssembly::BR:
+ // If we're running after CFGStackify, we can't optimize further.
+ if (!MI.getOperand(0).isMBB())
+ return true;
if (!HaveCond)
TBB = MI.getOperand(0).getMBB();
else
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index f0b4ce7..2e682a4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -66,8 +66,18 @@ def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
// WebAssembly-specific Operands.
//===----------------------------------------------------------------------===//
+let OperandNamespace = "WebAssembly" in {
+
+let OperandType = "OPERAND_BASIC_BLOCK" in
def bb_op : Operand<OtherVT>;
+let OperandType = "OPERAND_FPIMM" in {
+def f32imm_op : Operand<f32>;
+def f64imm_op : Operand<f64>;
+} // OperandType = "OPERAND_FPIMM"
+
+} // OperandNamespace = "WebAssembly"
+
//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
//===----------------------------------------------------------------------===//
@@ -120,31 +130,20 @@ def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
[(set I64:$res, imm:$imm)],
"i64.const\t$res, $imm">;
-def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm),
+def CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm),
[(set F32:$res, fpimm:$imm)],
"f32.const\t$res, $imm">;
-def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm),
+def CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
[(set F64:$res, fpimm:$imm)],
"f64.const\t$res, $imm">;
} // isMoveImm = 1
} // Defs = [ARGUMENTS]
-def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)),
- (CONST_I32 tglobaladdr:$dst)>;
-def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)),
- (CONST_I32 texternalsym:$dst)>;
-def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)),
- (CONST_I32 tjumptable:$dst)>;
-
-let Defs = [ARGUMENTS] in {
-
-// Function signature and local variable declaration "instructions".
-def PARAM : I<(outs), (ins variable_ops), [], ".param \t">;
-def RESULT : I<(outs), (ins variable_ops), [], ".result \t">;
-def LOCAL : I<(outs), (ins variable_ops), [], ".local \t">;
-
-} // Defs = [ARGUMENTS]
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+ (CONST_I32 tglobaladdr:$addr)>;
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+ (CONST_I32 texternalsym:$addr)>;
//===----------------------------------------------------------------------===//
// Additional sets of instructions.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 74ec45d..b39ac52 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -24,10 +24,25 @@
// WebAssembly constant offsets are performed as unsigned with infinite
// precision, so we need to check for NoUnsignedWrap so that we don't fold an
// offset for an add that needs wrapping.
-def regPlusImm : PatFrag<(ops node:$off, node:$addr),
+def regPlusImm : PatFrag<(ops node:$addr, node:$off),
(add node:$addr, node:$off),
[{ return N->getFlags()->hasNoUnsignedWrap(); }]>;
+// GlobalAddresses are conceptually unsigned values, so we can also fold them
+// into immediate values as long as their offsets are non-negative.
+def regPlusGA : PatFrag<(ops node:$addr, node:$off),
+ (add node:$addr, node:$off),
+ [{
+ return N->getFlags()->hasNoUnsignedWrap() ||
+ (N->getOperand(1)->getOpcode() == WebAssemblyISD::Wrapper &&
+ isa<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0)) &&
+ cast<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0))
+ ->getOffset() >= 0);
+}]>;
+
+// We don't need a regPlusES because external symbols never have constant
+// offsets folded into them, so we can just use add.
+
let Defs = [ARGUMENTS] in {
// Basic load.
@@ -49,29 +64,33 @@ def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>;
def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>;
// Select loads with a constant offset.
-def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_I32 imm:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_I64 imm:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_F32 imm:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_F64 imm:$off, $addr)>;
-def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(f32 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_F32 tglobaladdr:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(f64 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_F64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_I64 texternalsym:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_F32 texternalsym:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_F64 texternalsym:$off, $addr)>;
// Select loads with just a constant offset.
@@ -135,65 +154,85 @@ def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>;
def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
// Select extending loads with a constant offset.
-def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_S_I32 imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I32 imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_S_I32 imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I32 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
(LOAD32_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
(LOAD32_U_I64 imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_S_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_S_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD32_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD32_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_S_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_S_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD32_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD32_U_I64 texternalsym:$off, $addr)>;
// Select extending loads with just a constant offset.
@@ -259,35 +298,45 @@ def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
// Select "don't care" extending loads with a constant offset.
-def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I32 imm:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I32 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
(LOAD32_U_I64 imm:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD32_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD32_U_I64 texternalsym:$off, $addr)>;
// Select "don't care" extending loads with just a constant offset.
@@ -343,29 +392,37 @@ def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>;
def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>;
// Select stores with a constant offset.
-def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_F32 imm:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_F64 imm:$off, I32:$addr, F64:$val)>;
-def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store I32:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store I64:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store F32:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store F64:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>;
-def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store I32:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store I64:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store F32:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store F64:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>;
// Select stores with just a constant offset.
@@ -423,35 +480,54 @@ def : Pat<(truncstorei32 I64:$val, I32:$addr),
(STORE32_I64 0, I32:$addr, I64:$val)>;
// Select truncating stores with a constant offset.
-def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE8_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE16_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE8_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE16_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE32_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>;
// Select truncating stores with just a constant offset.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index a953f82..022a448 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -36,15 +36,17 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
}
-MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
- MCSymbol *Sym) const {
- assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags");
+MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
+ int64_t Offset,
+ bool IsFunc) const {
+ MCSymbolRefExpr::VariantKind VK =
+ IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
+ : MCSymbolRefExpr::VK_None;
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
- const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
-
- int64_t Offset = MO.getOffset();
if (Offset != 0) {
- assert(!MO.isJTI() && "Unexpected offset with jump table index");
+ if (IsFunc)
+ report_fatal_error("Function addresses with offsets not supported");
Expr =
MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
}
@@ -64,6 +66,9 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
default:
MI->dump();
llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_MachineBasicBlock:
+ MI->dump();
+ llvm_unreachable("MachineBasicBlock operand should have been rewritten");
case MachineOperand::MO_Register: {
// Ignore all implicit register operands.
if (MO.isImplicit())
@@ -89,15 +94,19 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
llvm_unreachable("unknown floating point immediate type");
break;
}
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(
- MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
- break;
case MachineOperand::MO_GlobalAddress:
- MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ assert(MO.getTargetFlags() == 0 &&
+ "WebAssembly does not use target flags on GlobalAddresses");
+ MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
+ MO.getGlobal()->getValueType()->isFunctionTy());
break;
case MachineOperand::MO_ExternalSymbol:
- MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ // The target flag indicates whether this is a symbol for a
+ // variable or a function.
+ assert((MO.getTargetFlags() & -2) == 0 &&
+ "WebAssembly uses only one target flag bit on ExternalSymbols");
+ MCOp = LowerSymbolOperand(GetExternalSymbolSymbol(MO), /*Offset=*/0,
+ MO.getTargetFlags() & 1);
break;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index 6d70470..ab4ba1c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -31,9 +31,10 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCContext &Ctx;
AsmPrinter &Printer;
- MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset,
+ bool IsFunc) const;
public:
WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 89ef5cd..537c147 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -147,8 +147,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// block boundaries, and the blocks aren't ordered so the block visitation
// order isn't significant, but we may want to change this in the future.
for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : reverse(MBB)) {
- MachineInstr *Insert = &MI;
+ // Don't use a range-based for loop, because we modify the list as we're
+ // iterating over it and the end iterator may change.
+ for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
+ MachineInstr *Insert = &*MII;
// Don't nest anything inside a phi.
if (Insert->getOpcode() == TargetOpcode::PHI)
break;
@@ -221,7 +223,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
Insert = Def;
}
if (AnyStackified)
- ImposeStackOrdering(&MI);
+ ImposeStackOrdering(&*MII);
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index dcada45..90d8dda 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -61,17 +61,23 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
MachineFunction &MF = *MBB.getParent();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
const MachineFrameInfo& MFI = *MF.getFrameInfo();
- int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
+ int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
if (MI.mayLoadOrStore()) {
// If this is a load or store, make it relative to SP and fold the frame
- // offset directly in
- assert(MI.getOperand(1).getImm() == 0 &&
- "Can't eliminate FI yet if offset is already set");
- MI.getOperand(1).setImm(FrameOffset);
+ // offset directly in.
+ assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0);
+ int64_t Offset = MI.getOperand(1).getImm() + FrameOffset;
+
+ if (static_cast<uint64_t>(Offset) > std::numeric_limits<uint32_t>::max()) {
+ // If this happens the program is invalid, but better to error here than
+ // generate broken code.
+ report_fatal_error("Memory offset field overflow");
+ }
+ MI.getOperand(1).setImm(Offset);
MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
} else {
- // Otherwise create an i32.add SP, offset and make it the operand
+ // Otherwise create an i32.add SP, offset and make it the operand.
auto &MRI = MF.getRegInfo();
const auto *TII = MF.getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index e31ea46..b290b4b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,8 +45,9 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128"
- : "e-p:32:32-i64:64-n32:64-S128",
+ : LLVMTargetMachine(T,
+ TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
+ : "e-m:e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
// WebAssembly type-checks expressions, but a noreturn function with a return
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 92ecde3..91b3fff 100644
--- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -5,23 +5,6 @@
pr38151.c
va-arg-22.c
-# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"'
-20030313-1.c
-20030916-1.c
-20031012-1.c
-20041126-1.c
-20060420-1.c
-20071202-1.c
-20120808-1.c
-pr20527-1.c
-pr27073.c
-pr36339.c
-pr37573.c
-pr43236.c
-pr43835.c
-pr45070.c
-pr51933.c
-
# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
struct-ret-1.c
va-arg-11.c
@@ -140,8 +123,6 @@ pr38051.c
pr39100.c
pr39339.c
-pr40022.c
-pr40657.c
pr43987.c
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index fbec662..01e65b8 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -29,7 +29,7 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
/// This pass initializes a global base register for PIC on x86-32.
-FunctionPass* createX86GlobalBaseRegPass();
+FunctionPass *createX86GlobalBaseRegPass();
/// This pass combines multiple accesses to local-dynamic TLS variables so that
/// the TLS base address for the module is only fetched once per execution path
@@ -49,12 +49,13 @@ FunctionPass *createX86IssueVZeroUpperPass();
/// This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
-/// Return a a pass that selectively replaces certain instructions (like add,
+/// Return a pass that selectively replaces certain instructions (like add,
/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
-/// Return a pass that removes redundant address recalculations.
+/// Return a pass that removes redundant LEA instructions and redundant address
+/// recalculations.
FunctionPass *createX86OptimizeLEAs();
/// Return a pass that optimizes the code-size of x86 call sequences. This is
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index 54d88cb..e8b96e7 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -831,6 +831,12 @@ def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
R8, R9, R10, R11)>;
+// CSRs that are handled by prologue, epilogue.
+def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
+
// All GPRs - except r11
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
R8, R9, R10, RSP)>;
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 629d4d3..f48b479 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1002,6 +1002,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index d31aab0..1ec93b5 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -265,7 +265,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
- }
+ } else if (!Subtarget->is64Bit())
+ setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
@@ -2310,6 +2311,18 @@ X86TargetLowering::LowerReturn(SDValue Chain,
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
+ const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (X86::GR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
+
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
@@ -3907,6 +3920,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
+ case X86ISD::INSERTPS:
case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
@@ -4157,6 +4171,35 @@ static bool hasFPCMov(unsigned X86CC) {
}
}
+
+bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+
+ const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
+ if (!IntrData)
+ return false;
+
+ switch (IntrData->Type) {
+ case LOADA:
+ case LOADU: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(I.getType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = (IntrData->Type == LOADA ? Info.memVT.getSizeInBits()/8 : 1);
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
@@ -4743,8 +4786,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// uses one source. Note that this will set IsUnary for shuffles which use a
/// single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
-/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
-static bool getTargetShuffleMask(SDNode *N, MVT VT,
+static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
@@ -4761,6 +4803,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
+ case X86ISD::INSERTPS:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+ break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -4870,10 +4917,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
- // Mask only contains negative index if an element is zero.
- if (std::any_of(Mask.begin(), Mask.end(),
- [](int M){ return M == SM_SentinelZero; }))
- return false;
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
@@ -5008,6 +5052,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (Mask.empty())
return false;
+ // Check if we're getting a shuffle mask with zero'd elements.
+ if (!AllowSentinelZero)
+ if (std::any_of(Mask.begin(), Mask.end(),
+ [](int M){ return M == SM_SentinelZero; }))
+ return false;
+
// If we have a fake unary shuffle, the shuffle mask is spread across two
// inputs that are actually the same node. Re-map the mask to always point
// into the first input.
@@ -5046,19 +5096,19 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
MVT ShufVT = V.getSimpleValueType();
- unsigned NumElems = ShufVT.getVectorNumElements();
+ int NumElems = (int)ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
bool IsUnary;
- if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
+ if (!getTargetShuffleMask(N, ShufVT, false, ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
- if (Elt < 0)
+ if (Elt == SM_SentinelUndef)
return DAG.getUNDEF(ShufVT.getVectorElementType());
- SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
- : N->getOperand(1);
+ assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
+ SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
@@ -8165,6 +8215,13 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
return TruncBroadcast;
+ MVT BroadcastVT = VT;
+
+ // Peek through any bitcast (only useful for loads).
+ SDValue BC = V;
+ while (BC.getOpcode() == ISD::BITCAST)
+ BC = BC.getOperand(0);
+
// Also check the simpler case, where we can directly reuse the scalar.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
@@ -8174,13 +8231,17 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
// Only AVX2 has register broadcasts.
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
- } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+ } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
+ // 32-bit targets need to load i64 as a f64 and then bitcast the result.
+ if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64)
+ BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
+
// If we are broadcasting a load that is only used by the shuffle
// then we can reduce the vector load to the broadcasted scalar load.
- LoadSDNode *Ld = cast<LoadSDNode>(V);
+ LoadSDNode *Ld = cast<LoadSDNode>(BC);
SDValue BaseAddr = Ld->getOperand(1);
EVT AddrVT = BaseAddr.getValueType();
- EVT SVT = VT.getScalarType();
+ EVT SVT = BroadcastVT.getScalarType();
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
SDValue NewAddr = DAG.getNode(
ISD::ADD, DL, AddrVT, BaseAddr,
@@ -8194,7 +8255,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
return SDValue();
}
- return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
+ V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V);
+ return DAG.getBitcast(VT, V);
}
// Check for whether we can use INSERTPS to perform the shuffle. We only use
@@ -12474,8 +12536,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// location.
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL);
SDValue Args[] = { Chain, Offset };
Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
+ Chain =
+ DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -12648,13 +12714,21 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Op;
}
+ SDValue ValueToStore = Op.getOperand(0);
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(
- DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+ DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
@@ -13027,7 +13101,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ SDValue ValueToStore = Op.getOperand(0);
+ if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore,
StackSlot, MachinePointerInfo(),
false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
@@ -17487,7 +17567,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, dl);
}
case COMPRESS_TO_MEM: {
- SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue DataToCompress = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
@@ -17513,7 +17592,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
case TRUNCATE_TO_MEM_VI32:
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
case EXPAND_FROM_MEM: {
- SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
@@ -17533,6 +17611,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
Mask, PassThru, Subtarget, DAG), Chain};
return DAG.getMergeValues(Results, dl);
}
+ case LOADU:
+ case LOADA: {
+ SDValue Mask = Op.getOperand(4);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Addr = Op.getOperand(2);
+ SDValue Chain = Op.getOperand(0);
+ MVT VT = Op.getSimpleValueType();
+
+ MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
+ assert(MemIntr && "Expected MemIntrinsicSDNode!");
+
+ if (isAllOnesConstant(Mask)) // return just a load
+ return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand());
+
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT,
+ MemIntr->getMemOperand(), ISD::NON_EXTLOAD);
+ }
}
}
@@ -19512,24 +19609,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
- if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
+ if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
+ SrcVT == MVT::i64) {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
// This conversion needs to be expanded.
return SDValue();
- SDValue InVec = Op->getOperand(0);
- SDLoc dl(Op);
- unsigned NumElts = SrcVT.getVectorNumElements();
- MVT SVT = SrcVT.getVectorElementType();
-
- // Widen the vector in input in the case of MVT::v2i32.
- // Example: from MVT::v2i32 to MVT::v4i32.
+ SDValue Op0 = Op->getOperand(0);
SmallVector<SDValue, 16> Elts;
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
- DAG.getIntPtrConstant(i, dl)));
-
+ SDLoc dl(Op);
+ unsigned NumElts;
+ MVT SVT;
+ if (SrcVT.isVector()) {
+ NumElts = SrcVT.getVectorNumElements();
+ SVT = SrcVT.getVectorElementType();
+
+ // Widen the vector in input in the case of MVT::v2i32.
+ // Example: from MVT::v2i32 to MVT::v4i32.
+ for (unsigned i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
+ DAG.getIntPtrConstant(i, dl)));
+ } else {
+ assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
+ "Unexpected source type in LowerBITCAST");
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(0, dl)));
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(1, dl)));
+ NumElts = 2;
+ SVT = MVT::i32;
+ }
// Explicitly mark the extra elements as Undef.
Elts.append(NumElts, DAG.getUNDEF(SVT));
@@ -20685,6 +20795,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::VROTLI: return "X86ISD::VROTLI";
+ case X86ISD::VROTRI: return "X86ISD::VROTRI";
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
@@ -23184,7 +23296,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
return false;
SmallVector<int, 16> OpMask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, true, OpMask, IsUnary);
// We only can combine unary shuffles which we can decode the mask for.
if (!HaveMask || !IsUnary)
return false;
@@ -23281,7 +23393,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
@@ -23854,6 +23966,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SDValue InVec = N->getOperand(0);
SDValue EltNo = N->getOperand(1);
+ EVT EltVT = N->getValueType(0);
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
@@ -23882,14 +23995,22 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
- if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
+ if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+ int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
+
+ if (Idx == SM_SentinelZero)
+ return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
+ : DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
+ if (Idx == SM_SentinelUndef)
+ return DAG.getUNDEF(EltVT);
+
+ assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
: InVec.getOperand(1);
@@ -23914,7 +24035,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
- EVT EltVT = N->getValueType(0);
// If there's a bitcast before the shuffle, check if the load type and
// alignment is valid.
unsigned Align = LN0->getAlignment();
@@ -27233,6 +27353,32 @@ static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
}
+/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
+/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
+/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
+/// extends from AH (which we otherwise need to do contortions to access).
+static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ auto OpcodeN = N->getOpcode();
+ auto OpcodeN0 = N0.getOpcode();
+ if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
+ (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT InVT = N0.getValueType();
+ if (N0.getResNo() != 1 || InVT != MVT::i8 || VT != MVT::i32)
+ return SDValue();
+
+ SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
+ auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
+ : X86ISD::UDIVREM8_ZEXT_HREG;
+ SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
+ N0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
+ return R.getValue(1);
+}
+
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -27243,18 +27389,8 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
EVT InSVT = InVT.getScalarType();
SDLoc DL(N);
- // (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
- // (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
- // This exposes the sext to the sdivrem lowering, so that it directly extends
- // from AH (which we otherwise need to do contortions to access).
- if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
- InVT == MVT::i8 && VT == MVT::i32) {
- SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
- SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, DL, NodeTys,
- N0.getOperand(0), N0.getOperand(1));
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
- return R.getValue(1);
- }
+ if (SDValue DivRem8 = getDivRem8(N, DAG))
+ return DivRem8;
if (!DCI.isBeforeLegalizeOps()) {
if (InVT == MVT::i1) {
@@ -27413,19 +27549,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
- // (i8,i32 zext (udivrem (i8 x, i8 y)) ->
- // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
- // This exposes the zext to the udivrem lowering, so that it directly extends
- // from AH (which we otherwise need to do contortions to access).
- if (N0.getOpcode() == ISD::UDIVREM &&
- N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
- VT == MVT::i32) {
- SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
- SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
- N0.getOperand(0), N0.getOperand(1));
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
- return R.getValue(1);
- }
+ if (SDValue DivRem8 = getDivRem8(N, DAG))
+ return DivRem8;
return SDValue();
}
@@ -27923,7 +28048,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
-// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated.
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
@@ -28763,3 +28887,51 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
Attribute::MinSize);
return OptSize && !VT.isVector();
}
+
+void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ if (!Subtarget->is64Bit())
+ return;
+
+ // Update IsSplitCSR in X86MachineFunctionInfo.
+ X86MachineFunctionInfo *AFI =
+ Entry->getParent()->getInfo<X86MachineFunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void X86TargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (X86::GR64RegClass.contains(*I))
+ RC = &X86::GR64RegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 8bb0e5f..0ab786e 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -316,6 +316,9 @@ namespace llvm {
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
+ // Bit rotate by immediate
+ VROTLI, VROTRI,
+
// Vector packed double/float comparison.
CMPP,
@@ -837,6 +840,13 @@ namespace llvm {
/// from i32 to i8 but not from i32 to i16.
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+ /// Given an intrinsic, checks if on the target the intrinsic will need to map
+ /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
+ /// true and stores the intrinsic information into the IntrinsicInfo that was
+ /// passed to the function.
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ unsigned Intrinsic) const override;
+
/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
@@ -1057,6 +1067,15 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 0a27c33..49be648 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -188,7 +188,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
let isCommutable = IsCommutable in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
- "$dst , "#IntelSrcAsm#"}",
+ "$dst, "#IntelSrcAsm#"}",
Pattern, itin>;
// Prefer over VMOV*rrk Pat<>
@@ -323,18 +323,16 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
list<dag> Pattern,
- list<dag> MaskingPattern,
- string Round = "",
- InstrItinClass itin = NoItinerary> {
+ list<dag> MaskingPattern> {
def NAME: AVX512<O, F, Outs, Ins,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
- "$dst "#Round#", "#IntelSrcAsm#"}",
- Pattern, itin>;
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
+ "$dst, "#IntelSrcAsm#"}",
+ Pattern, NoItinerary>;
def NAME#k: AVX512<O, F, Outs, MaskingIns,
- OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
- "$dst {${mask}}, "#IntelSrcAsm#Round#"}",
- MaskingPattern, itin>, EVEX_K;
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
+ "$dst {${mask}}, "#IntelSrcAsm#"}",
+ MaskingPattern, NoItinerary>, EVEX_K;
}
multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
@@ -342,33 +340,27 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Ins, dag MaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, dag MaskingRHS,
- string Round = "",
- InstrItinClass itin = NoItinerary> :
+ dag RHS, dag MaskingRHS> :
AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.KRC:$dst, RHS)],
- [(set _.KRC:$dst, MaskingRHS)],
- Round, NoItinerary>;
+ [(set _.KRC:$dst, MaskingRHS)]>;
multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, string Round = "",
- InstrItinClass itin = NoItinerary> :
+ dag RHS> :
AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (and _.KRCWM:$mask, RHS),
- Round, itin>;
+ (and _.KRCWM:$mask, RHS)>;
multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm> :
AVX512_maskable_custom_cmp<O, F, Outs,
Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
- AttSrcAsm, IntelSrcAsm,
- [],[],"", NoItinerary>;
+ AttSrcAsm, IntelSrcAsm, [],[]>;
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
@@ -1294,7 +1286,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
+ "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V;
def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -1311,7 +1303,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
+ "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
@@ -1426,7 +1418,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
- "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ "{sae}, $src2, $src1", "$src1, $src2, {sae}",
(OpNodeRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
@@ -1449,7 +1441,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
- "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
+ "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
EVEX_4V, EVEX_B;
}// let isAsmParserOnly = 1, hasSideEffects = 0
@@ -1831,7 +1823,7 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
- "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ "{sae}, $src2, $src1", "$src1, $src2, {sae}",
(X86cmpmRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
@@ -1842,8 +1834,8 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
- "$cc,{sae}, $src2, $src1",
- "$src1, $src2,{sae}, $cc">, EVEX_B;
+ "$cc, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $cc">, EVEX_B;
}
}
@@ -1889,13 +1881,13 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [prd] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2)))], NoItinerary>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
- "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1903,14 +1895,14 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1925,13 +1917,13 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string mem, string broadcast>{
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2)))], NoItinerary>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
- "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1939,21 +1931,21 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
- "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
- "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst | $dst, ${src1}"
+ _.BroadcastStr##", $dst|$dst, ${src1}"
##_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(OpNode
(_.VT (X86VBroadcast
@@ -1962,7 +1954,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"##
+ _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
(_.VT (X86VBroadcast
@@ -2715,30 +2707,6 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>,
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
-def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
- (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
- (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
- (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
-
-def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
- (VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
- (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
- (VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
-
-def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VMOVAPDZrm addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
- (VMOVAPSZrm addr:$ptr)>;
-
def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
GR16:$mask),
(VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
@@ -4088,8 +4056,8 @@ defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
-defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V;
-defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V;
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
@@ -6057,12 +6025,12 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
let mayStore = 1 in {
def mr : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, SrcInfo.RC:$src),
- OpcodeStr # "\t{$src, $dst |$dst, $src}",
+ OpcodeStr # "\t{$src, $dst|$dst, $src}",
[]>, EVEX;
def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
[]>, EVEX, EVEX_K;
}//mayStore = 1
}
@@ -6666,12 +6634,12 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
let mayStore = 1 in {
def mr : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.RC:$src),
- OpcodeStr # "\t{$src, $dst |$dst, $src}",
+ OpcodeStr # "\t{$src, $dst|$dst, $src}",
[]>, EVEX_CD8<_.EltSize, CD8VT1>;
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
[(store (_.VT (vselect _.KRCWM:$mask,
(_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
@@ -6766,7 +6734,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
+ OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2),
@@ -6895,8 +6863,8 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
- OpcodeStr, "$src3,{sae}, $src2, $src1",
- "$src1, $src2,{sae}, $src3",
+ OpcodeStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
@@ -6907,8 +6875,8 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
- OpcodeStr, "$src3,{sae}, $src2, $src1",
- "$src1, $src2,{sae}, $src3",
+ OpcodeStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index c4b2d6d..af43d9f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -98,22 +98,22 @@ let hasSideEffects = 0, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
+ [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
+ [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALULd]>;
def MOVSX32_NOREXrr8 : I<0xBE, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVSX>, TB, Sched<[WriteALU]>;
+ [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX32_NOREXrm8 : I<0xBE, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVSX>, TB, Sched<[WriteALULd]>;
+ [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -146,18 +146,22 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
Sched<[WriteALULd]>, Requires<[In64BitMode]>;
// movzbq and movzwq encodings for the disassembler
-def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALU]>;
-def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALULd]>;
-def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALU]>;
-def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALULd]>;
+let hasSideEffects = 0 in {
+def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+}
// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
// 32-bit register.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 829cedd..6432863 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -225,6 +225,9 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+def X86vrotli : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>;
+def X86vrotri : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>;
+
def X86vprot : SDNode<"X86ISD::VPROT",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index ea8e562..9c8339a 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1273,7 +1273,7 @@ def STOSW : I<0xAB, RawFrmDst, (outs dstidx16:$dst), (ins),
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
def STOSL : I<0xAB, RawFrmDst, (outs dstidx32:$dst), (ins),
"stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in
def STOSQ : RI<0xAB, RawFrmDst, (outs dstidx64:$dst), (ins),
"stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>;
@@ -2755,56 +2755,56 @@ def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
// Likewise for btc/btr/bts.
-def : InstAlias<"bt {$imm, $mem|$mem, $imm}",
+def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}",
(BT32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"btc {$imm, $mem|$mem, $imm}",
+def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}",
(BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"btr {$imm, $mem|$mem, $imm}",
+def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}",
(BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"bts {$imm, $mem|$mem, $imm}",
+def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}",
(BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
// clr aliases.
-def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>;
-def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
-def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
-def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
+def : InstAlias<"clrb\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>;
+def : InstAlias<"clrw\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
+def : InstAlias<"clrl\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
+def : InstAlias<"clrq\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
// lods aliases. Accept the destination being omitted because it's implicit
// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
// in the destination.
-def : InstAlias<"lodsb $src", (LODSB srcidx8:$src), 0>;
-def : InstAlias<"lodsw $src", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods{l|d} $src", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lodsq $src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"lods {$src, %al|al, $src}", (LODSB srcidx8:$src), 0>;
-def : InstAlias<"lods {$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods {$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lods {$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lodsb\t$src", (LODSB srcidx8:$src), 0>;
+def : InstAlias<"lodsw\t$src", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods{l|d}\t$src", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lodsq\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>;
+def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
// stos aliases. Accept the source being omitted because it's implicit in
// the mnemonic, or the mnemonic suffix being omitted because it's implicit
// in the source.
-def : InstAlias<"stosb $dst", (STOSB dstidx8:$dst), 0>;
-def : InstAlias<"stosw $dst", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos{l|d} $dst", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stosq $dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"stos {%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>;
-def : InstAlias<"stos {%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos {%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stos {%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stosb\t$dst", (STOSB dstidx8:$dst), 0>;
+def : InstAlias<"stosw\t$dst", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos{l|d}\t$dst", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stosq\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>;
+def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
// scas aliases. Accept the destination being omitted because it's implicit
// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
// in the destination.
-def : InstAlias<"scasb $dst", (SCASB dstidx8:$dst), 0>;
-def : InstAlias<"scasw $dst", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas{l|d} $dst", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scasq $dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"scas {$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>;
-def : InstAlias<"scas {$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas {$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scas {$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scasb\t$dst", (SCASB dstidx8:$dst), 0>;
+def : InstAlias<"scasw\t$dst", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas{l|d}\t$dst", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scasq\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
// div and idiv aliases for explicit A register.
def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>;
@@ -2892,30 +2892,30 @@ def : InstAlias<"fnstsw" , (FNSTSW16r)>;
// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
// this is compatible with what GAS does.
-def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall {*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp {*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"lcall {*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp {*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-
-def : InstAlias<"call {*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"jmp {*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"call {*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"jmp {*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"call {*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp {*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall\t{*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall\t{*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+
+def : InstAlias<"call\t{*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"call\t{*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"call\t{*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
// "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
-def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
-def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
-def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
-def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
-def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
// inb %dx -> inb %al, %dx
def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
@@ -2927,46 +2927,46 @@ def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
-def : InstAlias<"call $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
-def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
-def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
-def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"call\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"call\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
+def : InstAlias<"jmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
+def : InstAlias<"callw\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
// Force mov without a suffix with a segment and mem to prefer the 'l' form of
// the move. All segment/mem forms are equivalent, this has the shortest
// encoding.
-def : InstAlias<"mov {$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
-def : InstAlias<"mov {$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
+def : InstAlias<"mov\t{$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
+def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"movq {$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
+def : InstAlias<"movq\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
// Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq {$src, $dst|$dst, $src}",
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movq {$src, $dst|$dst, $src}",
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
// movsx aliases
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
// movzx aliases
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr16 GR64:$dst, GR16:$src), 0>;
// Note: No GR32->GR64 movzx form.
// outb %dx -> outb %al, %dx
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMPX.td b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
index 31608cd..71ab973 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMPX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMPX.td
@@ -15,10 +15,10 @@
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src),
- OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, In64BitMode]>;
}
@@ -26,16 +26,16 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rr: RI<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
}
defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS;
@@ -43,28 +43,28 @@ defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD;
defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD;
def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>;
def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>;
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
- "bndstx \t{$src, $dst|$dst, $src}", []>, PS,
+ "bndstx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- "bndldx \t{$src, $dst|$dst, $src}", []>, PS,
+ "bndldx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 624b931..6a7c456 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1808,7 +1808,7 @@ def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+ (CVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
/// SSE 2 Only
@@ -7838,9 +7838,7 @@ class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
- Sched<[Sched]>, VEX {
- let mayLoad = 1;
-}
+ Sched<[Sched]>, VEX;
// AVX2 adds register forms
class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -7871,7 +7869,7 @@ let ExeDomain = SSEPackedDouble in
def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
v4f64, v2f64, WriteFShuffle256>, VEX_L;
-let mayLoad = 1, Predicates = [HasAVX2] in
+let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
(ins i128mem:$src),
"vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
@@ -8259,6 +8257,9 @@ let Predicates = [HasF16C] in {
(VCVTPH2PSrm addr:$src)>;
def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
(VCVTPH2PSrm addr:$src)>;
+ def : Pat<(int_x86_vcvtph2ps_128 (bitconvert
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VCVTPH2PSrm addr:$src)>;
def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 646b556..b525d5e 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -29,7 +29,7 @@ enum IntrinsicType {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
- EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
+ EXPAND_FROM_MEM, LOADA, LOADU, BLEND, INSERT_SUBVEC,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
@@ -143,6 +143,18 @@ static const IntrinsicData IntrinsicsWithChain[] = {
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_pd_128, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_pd_256, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_pd_512, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_ps_128, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_ps_256, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_ps_512, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_pd_128, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_pd_256, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_pd_512, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_ps_128, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_ps_256, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_ps_512, LOADU, ISD::DELETED_NODE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -1129,6 +1141,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
@@ -1165,6 +1213,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
@@ -1201,12 +1285,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_d_128, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_d_256, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_d_512, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_q_128, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_q_256, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_q_512, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_d_128, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_d_256, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_d_512, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_q_128, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_q_256, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_q_512, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufh_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFHW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufh_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFHW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufh_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFHW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufl_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFLW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufl_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFLW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufl_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFLW, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
@@ -1219,8 +1345,21 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psll_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_wi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_wi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv16_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv2_di, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv32hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv4_di, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv4_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv8_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv8_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psllv_d, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psllv_q, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
@@ -1243,8 +1382,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psra_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav16_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav32_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav4_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav8_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q_128, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q_256, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 3a7a98d..00515dd 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -92,6 +92,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// used to address arguments in a function using a base pointer.
int SEHFramePtrSaveIndex = 0;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR = false;
+
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
@@ -160,6 +164,9 @@ public:
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
return ForwardedMustTailRegParms;
}
+
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 58020d9..45cc0ae 100644
--- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -9,8 +9,10 @@
//
// This file defines the pass that performs some optimizations with LEA
// instructions in order to improve code size.
-// Currently, it does one thing:
-// 1) Address calculations in load and store instructions are replaced by
+// Currently, it does two things:
+// 1) If there are two LEA instructions calculating addresses which only differ
+// by displacement inside a basic block, one of them is removed.
+// 2) Address calculations in load and store instructions are replaced by
// existing LEA def registers where possible.
//
//===----------------------------------------------------------------------===//
@@ -38,6 +40,7 @@ static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
cl::init(false));
STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
namespace {
class OptimizeLEAPass : public MachineFunctionPass {
@@ -71,6 +74,13 @@ private:
/// \brief Returns true if the instruction is LEA.
bool isLEA(const MachineInstr &MI);
+ /// \brief Returns true if the \p Last LEA instruction can be replaced by the
+ /// \p First. The difference between displacements of the addresses calculated
+ /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
+ /// replacement of the \p Last LEA's uses with the \p First's def register.
+ bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
+ int64_t &AddrDispShift);
+
/// \brief Returns true if two instructions have memory operands that only
/// differ by displacement. The numbers of the first memory operands for both
/// instructions are specified through \p N1 and \p N2. The address
@@ -79,13 +89,20 @@ private:
const MachineInstr &MI2, unsigned N2,
int64_t &AddrDispShift);
- /// \brief Find all LEA instructions in the basic block.
+ /// \brief Find all LEA instructions in the basic block. Also, assign position
+ /// numbers to all instructions in the basic block to speed up calculation of
+ /// distance between them.
void findLEAs(const MachineBasicBlock &MBB,
SmallVectorImpl<MachineInstr *> &List);
/// \brief Removes redundant address calculations.
bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
+ /// \brief Removes LEAs which calculate similar addresses.
+ bool removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List);
+
+ DenseMap<const MachineInstr *, unsigned> InstrPos;
+
MachineRegisterInfo *MRI;
const X86InstrInfo *TII;
const X86RegisterInfo *TRI;
@@ -99,14 +116,15 @@ FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
const MachineInstr &Last) {
- const MachineBasicBlock *MBB = First.getParent();
-
- // Both instructions must be in the same basic block.
- assert(Last.getParent() == MBB &&
+ // Both instructions must be in the same basic block and they must be
+ // presented in InstrPos.
+ assert(Last.getParent() == First.getParent() &&
"Instructions are in different basic blocks");
+ assert(InstrPos.find(&First) != InstrPos.end() &&
+ InstrPos.find(&Last) != InstrPos.end() &&
+ "Instructions' positions are undefined");
- return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) -
- std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First));
+ return InstrPos[&Last] - InstrPos[&First];
}
// Find the best LEA instruction in the List to replace address recalculation in
@@ -189,6 +207,69 @@ bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
}
+// Check that the Last LEA can be replaced by the First LEA. To be so,
+// these requirements must be met:
+// 1) Addresses calculated by LEAs differ only by displacement.
+// 2) Def registers of LEAs belong to the same class.
+// 3) All uses of the Last LEA def register are replaceable, thus the
+// register is used only as address base.
+bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
+ const MachineInstr &Last,
+ int64_t &AddrDispShift) {
+ assert(isLEA(First) && isLEA(Last) &&
+ "The function works only with LEA instructions");
+
+ // Compare instructions' memory operands.
+ if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift))
+ return false;
+
+ // Make sure that LEA def registers belong to the same class. There may be
+ // instructions (like MOV8mr_NOREX) which allow a limited set of registers to
+ // be used as their operands, so we must be sure that replacing one LEA
+ // with another won't lead to putting a wrong register in the instruction.
+ if (MRI->getRegClass(First.getOperand(0).getReg()) !=
+ MRI->getRegClass(Last.getOperand(0).getReg()))
+ return false;
+
+ // Loop over all uses of the Last LEA to check that its def register is
+ // used only as address base for memory accesses. If so, it can be
+ // replaced, otherwise - no.
+ for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) {
+ MachineInstr &MI = *MO.getParent();
+
+ // Get the number of the first memory operand.
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode());
+
+ // If the use instruction has no memory operand - the LEA is not
+ // replaceable.
+ if (MemOpNo < 0)
+ return false;
+
+ MemOpNo += X86II::getOperandBias(Desc);
+
+ // If the address base of the use instruction is not the LEA def register -
+ // the LEA is not replaceable.
+ if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
+ return false;
+
+ // If the LEA def register is used as any other operand of the use
+ // instruction - the LEA is not replaceable.
+ for (unsigned i = 0; i < MI.getNumOperands(); i++)
+ if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
+ isIdenticalOp(MI.getOperand(i), MO))
+ return false;
+
+ // Check that the new address displacement will fit 4 bytes.
+ if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
+ !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
+ AddrDispShift))
+ return false;
+ }
+
+ return true;
+}
+
// Check if MI1 and MI2 have memory operands which represent addresses that
// differ only by displacement.
bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
@@ -219,7 +300,15 @@ bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
SmallVectorImpl<MachineInstr *> &List) {
+ unsigned Pos = 0;
for (auto &MI : MBB) {
+ // Assign the position number to the instruction. Note that we are going to
+ // move some instructions during the optimization however there will never
+ // be a need to move two instructions before any selected instruction. So to
+ // avoid multiple positions' updates during moves we just increase position
+ // counter by two leaving a free space for instructions which will be moved.
+ InstrPos[&MI] = Pos += 2;
+
if (isLEA(MI))
List.push_back(const_cast<MachineInstr *>(&MI));
}
@@ -270,6 +359,13 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
if (Dist < 0) {
DefMI->removeFromParent();
MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
+ InstrPos[DefMI] = InstrPos[&MI] - 1;
+
+ // Make sure the instructions' position numbers are sane.
+ assert(((InstrPos[DefMI] == 1 && DefMI == MBB->begin()) ||
+ InstrPos[DefMI] >
+ InstrPos[std::prev(MachineBasicBlock::iterator(DefMI))]) &&
+ "Instruction positioning is broken");
}
// Since we can possibly extend register lifetime, clear kill flags.
@@ -296,6 +392,81 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
return Changed;
}
+// Try to find similar LEAs in the list and replace one with another.
+bool
+OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List) {
+ bool Changed = false;
+
+ // Loop over all LEA pairs.
+ auto I1 = List.begin();
+ while (I1 != List.end()) {
+ MachineInstr &First = **I1;
+ auto I2 = std::next(I1);
+ while (I2 != List.end()) {
+ MachineInstr &Last = **I2;
+ int64_t AddrDispShift;
+
+ // LEAs should be in occurence order in the list, so we can freely
+ // replace later LEAs with earlier ones.
+ assert(calcInstrDist(First, Last) > 0 &&
+ "LEAs must be in occurence order in the list");
+
+ // Check that the Last LEA instruction can be replaced by the First.
+ if (!isReplaceable(First, Last, AddrDispShift)) {
+ ++I2;
+ continue;
+ }
+
+ // Loop over all uses of the Last LEA and update their operands. Note that
+ // the correctness of this has already been checked in the isReplaceable
+ // function.
+ for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()),
+ UE = MRI->use_end();
+ UI != UE;) {
+ MachineOperand &MO = *UI++;
+ MachineInstr &MI = *MO.getParent();
+
+ // Get the number of the first memory operand.
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
+ X86II::getOperandBias(Desc);
+
+ // Update address base.
+ MO.setReg(First.getOperand(0).getReg());
+
+ // Update address disp.
+ MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp);
+ if (Op->isImm())
+ Op->setImm(Op->getImm() + AddrDispShift);
+ else if (Op->isGlobal())
+ Op->setOffset(Op->getOffset() + AddrDispShift);
+ else
+ llvm_unreachable("Invalid address displacement operand");
+ }
+
+ // Since we can possibly extend register lifetime, clear kill flags.
+ MRI->clearKillFlags(First.getOperand(0).getReg());
+
+ ++NumRedundantLEAs;
+ DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump(););
+
+ // By this moment, all of the Last LEA's uses must be replaced. So we can
+ // freely remove it.
+ assert(MRI->use_empty(Last.getOperand(0).getReg()) &&
+ "The LEA's def register must have no uses");
+ Last.eraseFromParent();
+
+ // Erase removed LEA from the list.
+ I2 = List.erase(I2);
+
+ Changed = true;
+ }
+ ++I1;
+ }
+
+ return Changed;
+}
+
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
@@ -310,6 +481,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
// Process all basic blocks.
for (auto &MBB : MF) {
SmallVector<MachineInstr *, 16> LEAs;
+ InstrPos.clear();
// Find all LEA instructions in basic block.
findLEAs(MBB, LEAs);
@@ -318,6 +490,11 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
if (LEAs.empty())
continue;
+ // Remove redundant LEA instructions. The optimization may have a negative
+ // effect on performance, so do it only for -Oz.
+ if (MF.getFunction()->optForMinSize())
+ Changed |= removeRedundantLEAs(LEAs);
+
// Remove redundant address calculations.
Changed |= removeRedundantAddrCalc(LEAs);
}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 5840443..274b566 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -250,7 +250,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_RT_AllRegs_SaveList;
case CallingConv::CXX_FAST_TLS:
if (Is64Bit)
- return CSR_64_TLS_Darwin_SaveList;
+ return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
+ CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
@@ -305,6 +306,15 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_32_SaveList;
}
+const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
+ return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index f014c8f..8d0094c 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -99,6 +99,8 @@ public:
/// callee-save registers on this target.
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction* MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 816291d..6df0447 100644
--- a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -22,7 +22,7 @@ static cl::list<std::string>
ForceAttributes("force-attribute", cl::Hidden,
cl::desc("Add an attribute to a function. This should be a "
"pair of 'function-name:attribute-name', for "
- "example -force-add-attribute=foo:noinline. This "
+ "example -force-attribute=foo:noinline. This "
"option can be specified multiple times."));
static Attribute::AttrKind parseAttrKind(StringRef Kind) {
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 6dcfb3f..527fdd1 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -6,16 +6,11 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements a simple interprocedural pass which walks the
-// call-graph, looking for functions which do not access or only read
-// non-local memory, and marking them readnone/readonly. It does the
-// same with function arguments independently, marking them readonly/
-// readnone/nocapture. Finally, well-known library call declarations
-// are marked with all attributes that are consistent with the
-// function's standard definition. This pass is implemented as a
-// bottom-up traversal of the call-graph.
-//
+///
+/// \file
+/// This file implements interprocedural passes which walk the
+/// call-graph deducing and/or propagating function attributes.
+///
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
@@ -57,19 +52,14 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet;
}
namespace {
-struct FunctionAttrs : public CallGraphSCCPass {
+struct PostOrderFunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID) {
- initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ PostOrderFunctionAttrs() : CallGraphSCCPass(ID) {
+ initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
}
bool runOnSCC(CallGraphSCC &SCC) override;
- bool doInitialization(CallGraph &CG) override {
- Revisit.clear();
- return false;
- }
- bool doFinalization(CallGraph &CG) override;
-
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
@@ -79,20 +69,19 @@ struct FunctionAttrs : public CallGraphSCCPass {
private:
TargetLibraryInfo *TLI;
- SmallVector<WeakVH,16> Revisit;
};
}
-char FunctionAttrs::ID = 0;
-INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+char PostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
-Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); }
namespace {
/// The three kinds of memory access relevant to 'readonly' and
@@ -949,8 +938,7 @@ static bool setDoesNotRecurse(Function &F) {
return true;
}
-static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
- SmallVectorImpl<WeakVH> &Revisit) {
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
@@ -973,32 +961,11 @@ static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
// Function calls a potentially recursive function.
return setDoesNotRecurse(*F);
- // We know that F is not obviously recursive, but we haven't been able to
- // prove that it doesn't actually recurse. Add it to the Revisit list to try
- // again top-down later.
- Revisit.push_back(F);
+ // Nothing else we can deduce usefully during the postorder traversal.
return false;
}
-static bool addNoRecurseAttrsTopDownOnly(Function *F) {
- // If F is internal and all uses are in norecurse functions, then F is also
- // norecurse.
- if (F->doesNotRecurse())
- return false;
- if (F->hasInternalLinkage()) {
- for (auto *U : F->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- if (!I->getParent()->getParent()->doesNotRecurse())
- return false;
- } else {
- return false;
- }
- return setDoesNotRecurse(*F);
- }
- return false;
-}
-
-bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
@@ -1040,19 +1007,100 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
Changed |= addNoAliasAttrs(SCCNodes);
Changed |= addNonNullAttrs(SCCNodes, *TLI);
}
-
- Changed |= addNoRecurseAttrs(SCC, Revisit);
+
+ Changed |= addNoRecurseAttrs(SCC);
return Changed;
}
-bool FunctionAttrs::doFinalization(CallGraph &CG) {
+namespace {
+/// A pass to do RPO deduction and propagation of function attributes.
+///
+/// This pass provides a general RPO or "top down" propagation of
+/// function attributes. For a few (rare) cases, we can deduce significantly
+/// more about function attributes by working in RPO, so this pass
+/// provides the compliment to the post-order pass above where the majority of
+/// deduction is performed.
+// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so
+// this is a boring module pass, but eventually it should be an RPO CGSCC pass
+// when such infrastructure is available.
+struct ReversePostOrderFunctionAttrs : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ReversePostOrderFunctionAttrs() : ModulePass(ID) {
+ initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<CallGraphWrapperPass>();
+ }
+};
+}
+
+char ReversePostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+ "Deduce function attributes in RPO", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+ "Deduce function attributes in RPO", false, false)
+
+Pass *llvm::createReversePostOrderFunctionAttrsPass() {
+ return new ReversePostOrderFunctionAttrs();
+}
+
+static bool addNoRecurseAttrsTopDown(Function &F) {
+ // We check the preconditions for the function prior to calling this to avoid
+ // the cost of building up a reversible post-order list. We assert them here
+ // to make sure none of the invariants this relies on were violated.
+ assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
+ assert(!F.doesNotRecurse() &&
+ "This function has already been deduced as norecurs!");
+ assert(F.hasInternalLinkage() &&
+ "Can only do top-down deduction for internal linkage functions!");
+
+ // If F is internal and all of its uses are calls from a non-recursive
+ // functions, then none of its calls could in fact recurse without going
+ // through a function marked norecurse, and so we can mark this function too
+ // as norecurse. Note that the uses must actually be calls -- otherwise
+ // a pointer to this function could be returned from a norecurse function but
+ // this function could be recursively (indirectly) called. Note that this
+ // also detects if F is directly recursive as F is not yet marked as
+ // a norecurse function.
+ for (auto *U : F.users()) {
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ CallSite CS(I);
+ if (!CS || !CS.getParent()->getParent()->doesNotRecurse())
+ return false;
+ }
+ return setDoesNotRecurse(F);
+}
+
+bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) {
+ // We only have a post-order SCC traversal (because SCCs are inherently
+ // discovered in post-order), so we accumulate them in a vector and then walk
+ // it in reverse. This is simpler than using the RPO iterator infrastructure
+ // because we need to combine SCC detection and the PO walk of the call
+ // graph. We can also cheat egregiously because we're primarily interested in
+ // synthesizing norecurse and so we can only save the singular SCCs as SCCs
+ // with multiple functions in them will clearly be recursive.
+ auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ SmallVector<Function *, 16> Worklist;
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ if (I->size() != 1)
+ continue;
+
+ Function *F = I->front()->getFunction();
+ if (F && !F->isDeclaration() && !F->doesNotRecurse() &&
+ F->hasInternalLinkage())
+ Worklist.push_back(F);
+ }
+
bool Changed = false;
- // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
- // the rules we have for identifying norecurse functions work best with a
- // top-down walk, so look again at all the functions we previously marked as
- // worth revisiting, in top-down order.
- for (auto &F : reverse(Revisit))
- if (F)
- Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
+ for (auto *F : reverse(Worklist))
+ Changed |= addNoRecurseAttrsTopDown(*F);
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index d8b677b..5e0df95 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -41,15 +41,16 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
SMDiagnostic Err;
DEBUG(dbgs() << "Loading '" << FileName << "'\n");
- std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+ // Metadata isn't loaded or linked until after all functions are
+ // imported, after which it will be materialized and linked.
+ std::unique_ptr<Module> Result =
+ getLazyIRFileModule(FileName, Err, Context,
+ /* ShouldLazyLoadMetadata = */ true);
if (!Result) {
Err.print("function-import", errs());
return nullptr;
}
- Result->materializeMetadata();
- UpgradeDebugInfo(*Result);
-
return Result;
}
@@ -132,6 +133,8 @@ static void findExternalCalls(const Module &DestModule, Function &F,
// Ignore functions already present in the destination module
auto *SrcGV = DestModule.getNamedValue(ImportedName);
if (SrcGV) {
+ if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV))
+ SrcGV = SGA->getBaseObject();
assert(isa<Function>(SrcGV) && "Name collision during import");
if (!cast<Function>(SrcGV)->isDeclaration()) {
DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
@@ -324,6 +327,10 @@ bool FunctionImporter::importFunctions(Module &DestModule) {
ModuleToTempMDValsMap) {
// Load the specified source module.
auto &SrcModule = ModuleLoaderCache(SME.getKey());
+ // The modules were created with lazy metadata loading. Materialize it
+ // now, before linking it.
+ SrcModule.materializeMetadata();
+ UpgradeDebugInfo(SrcModule);
// Link in all necessary metadata from this module.
if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
@@ -408,14 +415,19 @@ public:
Index = IndexPtr.get();
}
+ // First we need to promote to global scope and rename any local values that
+ // are potentially exported to other modules.
+ if (renameModuleForThinLTO(M, Index)) {
+ errs() << "Error renaming module\n";
+ return false;
+ }
+
// Perform the import now.
auto ModuleLoader = [&M](StringRef Identifier) {
return loadFile(Identifier, M.getContext());
};
FunctionImporter Importer(*Index, ModuleLoader);
return Importer.importFunctions(M);
-
- return false;
}
};
} // anonymous namespace
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 7ea6c08..89629cf0 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -28,7 +28,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeDAEPass(Registry);
initializeDAHPass(Registry);
initializeForceFunctionAttrsLegacyPassPass(Registry);
- initializeFunctionAttrsPass(Registry);
initializeGlobalDCEPass(Registry);
initializeGlobalOptPass(Registry);
initializeIPCPPass(Registry);
@@ -42,6 +41,8 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLowerBitSetsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
+ initializePostOrderFunctionAttrsPass(Registry);
+ initializeReversePostOrderFunctionAttrsPass(Registry);
initializePruneEHPass(Registry);
initializeStripDeadPrototypesLegacyPassPass(Registry);
initializeStripSymbolsPass(Registry);
@@ -71,7 +72,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
}
void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createFunctionAttrsPass());
+ unwrap(PM)->add(createPostOrderFunctionAttrsPass());
}
void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 8e4ad64..3c6a7bb 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -38,7 +38,7 @@ namespace {
static char ID; // Pass identification, replacement for typeid
unsigned NumLoops;
- explicit LoopExtractor(unsigned numLoops = ~0)
+ explicit LoopExtractor(unsigned numLoops = ~0)
: LoopPass(ID), NumLoops(numLoops) {
initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
}
@@ -143,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
- LI.updateUnloop(L);
+ LI.markAsRemoved(L);
}
++NumExtracted;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9876efa..faada9c 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -251,7 +251,7 @@ void PassManagerBuilder::populateModulePassManager(
Inliner = nullptr;
}
if (!DisableUnitAtATime)
- MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+ MPM.add(createPostOrderFunctionAttrsPass());
if (OptLevel > 2)
MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@@ -346,6 +346,9 @@ void PassManagerBuilder::populateModulePassManager(
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (!DisableUnitAtATime)
+ MPM.add(createReversePostOrderFunctionAttrsPass());
+
if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
// Remove avail extern fns and globals definitions if we aren't
// compiling an object file for later LTO. For LTO we want to preserve
@@ -502,7 +505,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createIPSCCPPass());
// Now that we internalized some globals, see if we can hack on them!
- PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
+ PM.add(createPostOrderFunctionAttrsPass());
+ PM.add(createReversePostOrderFunctionAttrsPass());
PM.add(createGlobalOptimizerPass());
// Promote any localized global vars.
PM.add(createPromoteMemoryToRegisterPass());
@@ -551,7 +555,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createScalarReplAggregatesPass());
// Run a few AA driven optimizations here and now, to cleanup the code.
- PM.add(createFunctionAttrsPass()); // Add nocapture.
+ PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7ad0efc..160792b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -636,7 +636,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
// if pattern detected emit alternate sequence
if (OpX && OpY) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(Log2->getFastMathFlags());
+ Builder->setFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
Value *FMulVal = Builder->CreateFMul(OpX, Log2);
Value *FSub = Builder->CreateFSub(FMulVal, OpX);
@@ -652,7 +652,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(I.getFastMathFlags());
+ Builder->setFastMathFlags(I.getFastMathFlags());
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -693,7 +693,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Y) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(I.getFastMathFlags());
+ Builder->setFastMathFlags(I.getFastMathFlags());
Value *T = Builder->CreateFMul(Opnd1, Opnd1);
Value *R = Builder->CreateFMul(T, Y);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 776704d..51219bc 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -930,7 +930,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->SetFastMathFlags(FCI->getFastMathFlags());
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
FCI->getName() + ".inv");
@@ -973,7 +973,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->SetFastMathFlags(FCI->getFastMathFlags());
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
FCI->getName() + ".inv");
@@ -1082,7 +1082,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
} else {
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
- Builder->SetFastMathFlags(FMF);
+ Builder->setFastMathFlags(FMF);
Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 54a9fbd..5cde31a 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt,
ConstantVector::get(ExtendMask));
// Insert the new shuffle after the vector operand of the extract is defined
- // or at the start of the basic block, so any subsequent extracts can use it.
- bool ReplaceAllExtUsers;
- if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) {
+ // (as long as it's not a PHI) or at the start of the basic block of the
+ // extract, so any subsequent extracts in the same basic block can use it.
+ // TODO: Insert before the earliest ExtractElementInst that is replaced.
+ auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
+ if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
WideVec->insertAfter(ExtVecOpInst);
- ReplaceAllExtUsers = true;
- } else {
- // TODO: Insert at start of function, so it's always safe to replace all?
+ else
IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
- ReplaceAllExtUsers = false;
- }
// Replace extracts from the original narrow vector with extracts from the new
// wide vector.
for (User *U : ExtVecOp->users()) {
ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
- if (!OldExt ||
- (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent()))
+ if (!OldExt || OldExt->getParent() != WideVec->getParent())
continue;
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
NewExt->insertAfter(WideVec);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 51ff95d..28483e7 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -93,8 +93,8 @@ private:
/// Replace instrprof_increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
- /// Set up the section and uses for coverage data and its references.
- void lowerCoverageData(GlobalVariable *CoverageData);
+ /// Force emitting of name vars for unused functions.
+ void lowerCoverageData(GlobalVariable *CoverageNamesVar);
/// Get the region counters for an increment, creating them if necessary.
///
@@ -156,9 +156,9 @@ bool InstrProfiling::runOnModule(Module &M) {
}
}
- if (GlobalVariable *Coverage =
- M.getNamedGlobal(getCoverageMappingVarName())) {
- lowerCoverageData(Coverage);
+ if (GlobalVariable *CoverageNamesVar =
+ M.getNamedGlobal(getCoverageNamesVarName())) {
+ lowerCoverageData(CoverageNamesVar);
MadeChange = true;
}
@@ -233,28 +233,16 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
Inc->eraseFromParent();
}
-void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
-
- Constant *Init = CoverageData->getInitializer();
- // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] }
- // for some C. If not, the frontend's given us something broken.
- assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map");
- assert(isa<ConstantArray>(Init->getAggregateElement(1)) &&
- "invalid function list in coverage map");
- ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1));
- for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
- Constant *Record = Records->getOperand(I);
- Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
+void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
+ ConstantArray *Names =
+ cast<ConstantArray>(CoverageNamesVar->getInitializer());
+ for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
+ Constant *NC = Names->getOperand(I);
+ Value *V = NC->stripPointerCasts();
assert(isa<GlobalVariable>(V) && "Missing reference to function name");
GlobalVariable *Name = cast<GlobalVariable>(V);
- // If we have region counters for this name, we've already handled it.
- auto It = ProfileDataMap.find(Name);
- if (It != ProfileDataMap.end())
- if (It->second.RegionCounters)
- continue;
-
// Move the name variable to the right section.
Name->setSection(getNameSection());
Name->setAlignment(1);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5a7bce5..34aaa7f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -692,7 +692,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getParent()->getDataLayout();
unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
- if (isa<StructType>(Shadow->getType())) {
+ if (Shadow->getType()->isAggregateType()) {
paintOrigin(IRB, updateOrigin(Origin, IRB),
getOriginPtr(Addr, IRB, Alignment), StoreSize,
OriginAlignment);
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 087ce8a..dcdcfed 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -100,9 +100,9 @@ namespace {
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData;
#ifdef NDEBUG
- SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+ SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
#else
- SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+ SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
@@ -163,6 +163,7 @@ namespace {
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+ bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
private:
BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
@@ -210,11 +211,12 @@ bool JumpThreading::runOnFunction(Function &F) {
// we will loop forever. We take care of this issue by not jump threading for
// back edges. This works for normal cases but not for unreachable blocks as
// they may have cycle with no back edge.
- removeUnreachableBlocks(F);
+ bool EverChanged = false;
+ EverChanged |= removeUnreachableBlocks(F, LVI);
FindLoopHeaders(F);
- bool Changed, EverChanged = false;
+ bool Changed;
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
@@ -363,8 +365,8 @@ void JumpThreading::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
- for (unsigned i = 0, e = Edges.size(); i != e; ++i)
- LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+ for (const auto &Edge : Edges)
+ LoopHeaders.insert(Edge.second);
}
/// getKnownConstant - Helper method to determine if we can thread over a
@@ -410,8 +412,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a constant, then it is known in all predecessors.
if (Constant *KC = getKnownConstant(V, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
return true;
}
@@ -434,8 +436,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
@@ -491,22 +492,17 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal ||
- isa<UndefValue>(LHSVals[i].first)) {
- Result.push_back(LHSVals[i]);
- Result.back().first = InterestingVal;
- LHSKnownBBs.insert(LHSVals[i].second);
+ for (const auto &LHSVal : LHSVals)
+ if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
+ Result.emplace_back(InterestingVal, LHSVal.second);
+ LHSKnownBBs.insert(LHSVal.second);
}
- for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal ||
- isa<UndefValue>(RHSVals[i].first)) {
+ for (const auto &RHSVal : RHSVals)
+ if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- if (!LHSKnownBBs.count(RHSVals[i].second)) {
- Result.push_back(RHSVals[i]);
- Result.back().first = InterestingVal;
- }
+ if (!LHSKnownBBs.count(RHSVal.second))
+ Result.emplace_back(InterestingVal, RHSVal.second);
}
return !Result.empty();
@@ -522,8 +518,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
return false;
// Invert the known values.
- for (unsigned i = 0, e = Result.size(); i != e; ++i)
- Result[i].first = ConstantExpr::getNot(Result[i].first);
+ for (auto &R : Result)
+ R.first = ConstantExpr::getNot(R.first);
return true;
}
@@ -538,12 +534,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
WantInteger, CxtI);
// Try to use constant folding to simplify the binary operator.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
}
@@ -591,8 +587,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate Res =
@@ -615,12 +610,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
WantInteger, CxtI);
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
return !Result.empty();
@@ -637,8 +632,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
if ((TrueVal || FalseVal) &&
ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
WantInteger, CxtI)) {
- for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
- Constant *Cond = Conds[i].first;
+ for (auto &C : Conds) {
+ Constant *Cond = C.first;
// Figure out what value to use for the condition.
bool KnownCond;
@@ -655,7 +650,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// See if the select has a known constant value for this predecessor.
if (Constant *Val = KnownCond ? TrueVal : FalseVal)
- Result.push_back(std::make_pair(Val, Conds[i].second));
+ Result.push_back(std::make_pair(Val, C.second));
}
return !Result.empty();
@@ -665,8 +660,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If all else fails, see if LVI can figure out a constant value for us.
Constant *CI = LVI->getConstant(V, BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
}
return !Result.empty();
@@ -736,6 +731,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
+ if (TryToUnfoldSelectInCurrBB(BB))
+ return true;
+
// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;
@@ -988,10 +986,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *PredBB = *PI;
-
+ for (BasicBlock *PredBB : predecessors(LoadBB)) {
// If we already scanned this predecessor, skip it.
if (!PredsScanned.insert(PredBB).second)
continue;
@@ -1038,13 +1033,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallVector<BasicBlock*, 8> PredsToSplit;
SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
- for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
- AvailablePredSet.insert(AvailablePreds[i].first);
+ for (const auto &AvailablePred : AvailablePreds)
+ AvailablePredSet.insert(AvailablePred.first);
// Add all the unavailable predecessors to the PredsToSplit list.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
if (isa<IndirectBrInst>(P->getTerminator()))
return false;
@@ -1129,9 +1122,9 @@ FindMostPopularDest(BasicBlock *BB,
// blocks with known and real destinations to threading undef. We'll handle
// them later if interesting.
DenseMap<BasicBlock*, unsigned> DestPopularity;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second)
- DestPopularity[PredToDestList[i].second]++;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second)
+ DestPopularity[PredToDest.second]++;
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
@@ -1194,10 +1187,10 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
"ComputeValueKnownInPredecessors returned true with no values");
DEBUG(dbgs() << "IN BB: " << *BB;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ for (const auto &PredValue : PredValues) {
dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
- << *PredValues[i].first
- << " for pred '" << PredValues[i].second->getName() << "'.\n";
+ << *PredValue.first
+ << " for pred '" << PredValue.second->getName() << "'.\n";
});
// Decide what we want to thread through. Convert our list of known values to
@@ -1210,8 +1203,8 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- BasicBlock *Pred = PredValues[i].second;
+ for (const auto &PredValue : PredValues) {
+ BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
continue; // Duplicate predecessor entry.
@@ -1220,7 +1213,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
- Constant *Val = PredValues[i].first;
+ Constant *Val = PredValue.first;
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
@@ -1260,16 +1253,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second == MostPopularDest) {
- BasicBlock *Pred = PredToDestList[i].first;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second == MostPopularDest) {
+ BasicBlock *Pred = PredToDest.first;
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
- TerminatorInst *PredTI = Pred->getTerminator();
- for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
- if (PredTI->getSuccessor(i) == BB)
+ for (BasicBlock *Succ : successors(Pred))
+ if (Succ == BB)
PredsToFactor.push_back(Pred);
}
@@ -1366,11 +1358,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (isa<UndefValue>(XorOpValue.first))
// Ignore undefs for the count.
continue;
- if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+ if (cast<ConstantInt>(XorOpValue.first)->isZero())
++NumFalse;
else
++NumTrue;
@@ -1386,12 +1378,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (XorOpValues[i].first != SplitVal &&
- !isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
continue;
- BlocksToFoldInto.push_back(XorOpValues[i].second);
+ BlocksToFoldInto.push_back(XorOpValue.second);
}
// If we inferred a value for all of the predecessors, then duplication won't
@@ -1543,10 +1534,10 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1561,14 +1552,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1644,10 +1635,10 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
// Collect updated outgoing edges' frequencies from BB and use them to update
// edge probabilities.
SmallVector<uint64_t, 4> BBSuccFreq;
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- auto SuccFreq = (*I == SuccBB)
+ for (BasicBlock *Succ : successors(BB)) {
+ auto SuccFreq = (Succ == SuccBB)
? BB2SuccBBFreq - NewBBFreq
- : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
BBSuccFreq.push_back(SuccFreq.getFrequency());
}
@@ -1783,10 +1774,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1801,14 +1792,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1903,3 +1894,62 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
}
return false;
}
+
+/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form
+/// bb:
+/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
+/// %s = select p, trueval, falseval
+///
+/// And expand the select into a branch structure. This later enables
+/// jump-threading over bb in this pass.
+///
+/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
+/// select if the associated PHI has at least one constant. If the unfolded
+/// select is not jump-threaded, it will be folded again in the later
+/// optimizations.
+bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ // Look for a Phi/Select pair in the same basic block. The Phi feeds the
+ // condition of the Select and at least one of the incoming values is a
+ // constant.
+ for (BasicBlock::iterator BI = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0 || !PN->hasOneUse())
+ continue;
+
+ SelectInst *SI = dyn_cast<SelectInst>(PN->user_back());
+ if (!SI || SI->getParent() != BB)
+ continue;
+
+ Value *Cond = SI->getCondition();
+ if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1))
+ continue;
+
+ bool HasConst = false;
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ if (PN->getIncomingBlock(i) == BB)
+ return false;
+ if (isa<ConstantInt>(PN->getIncomingValue(i)))
+ HasConst = true;
+ }
+
+ if (HasConst) {
+ // Expand the select.
+ TerminatorInst *Term =
+ SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+ PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+ NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
+ NewPN->addIncoming(SI->getFalseValue(), BB);
+ SI->replaceAllUsesWith(NewPN);
+ SI->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index e01e23f..8923ff7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -203,9 +203,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
- for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
- LoopItr != LoopItrE; ++LoopItr) {
- Loop *InnerL = *LoopItr;
+ for (Loop *InnerL : L->getSubLoops()) {
AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
assert(InnerAST && "Where is my AST?");
@@ -227,9 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Because subloops have already been incorporated into AST, we skip blocks in
// subloops.
//
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
+ for (BasicBlock *BB : L->blocks()) {
if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -263,9 +259,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
PredIteratorCache PIC;
// Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I)
- Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts,
+ for (AliasSet &AS : *CurAST)
+ Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
PIC, LI, DT, CurLoop,
CurAST, &SafetyInfo);
@@ -324,9 +319,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
if (inSubLoop(BB,CurLoop,LI)) return Changed;
@@ -407,9 +402,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
}
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
return Changed;
}
@@ -499,9 +493,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
+ for (AliasSet &AS : *CurAST) {
if (!AS.isForwardingAliasSet() && AS.isMod()) {
FoundMod = true;
break;
@@ -783,8 +775,8 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
CurLoop->getExitBlocks(ExitBlocks);
// Verify that the block dominates each of the exit blocks of the loop.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(Inst.getParent(), ExitBlock))
return false;
// As a degenerate case, if the loop is statically infinite then we haven't
@@ -951,17 +943,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (const LoadInst *load = dyn_cast<LoadInst>(UI)) {
- assert(!load->isVolatile() && "AST broken");
- if (!load->isSimple())
+ if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) {
+ assert(!Load->isVolatile() && "AST broken");
+ if (!Load->isSimple())
return Changed;
- } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) {
+ } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (UI->getOperand(1) != ASIV)
continue;
- assert(!store->isVolatile() && "AST broken");
- if (!store->isSimple())
+ assert(!Store->isVolatile() && "AST broken");
+ if (!Store->isSimple())
return Changed;
// Don't sink stores from loops without dedicated block exits. Exits
// containing indirect branches are not transformed by loop simplify,
@@ -979,7 +971,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// restrictive (and performant) alignment and if we are sure this
// instruction will be executed, update the alignment.
// Larger is better, with the exception of 0 being the best alignment.
- unsigned InstAlignment = store->getAlignment();
+ unsigned InstAlignment = Store->getAlignment();
if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
GuaranteedToExecute = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index bc00ff3..7b1940b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -245,7 +245,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
loopInfo.removeBlock(BB);
// The last step is to update LoopInfo now that we've eliminated this loop.
- loopInfo.updateUnloop(L);
+ loopInfo.markAsRemoved(L);
Changed = true;
++NumDeleted;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 56ae5c0..ecef6db 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -39,16 +39,16 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
static cl::opt<unsigned>
- UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+ UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The baseline cost threshold for loop unrolling"));
static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
- "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,
+ "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden,
cl::desc("The percentage of estimated dynamic cost which must be saved by "
"unrolling to allow unrolling up to the max threshold."));
static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
- "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,
+ "unroll-dynamic-cost-savings-discount", cl::Hidden,
cl::desc("This is the amount discounted from the total unroll cost when "
"the unrolled form has a high dynamic cost savings (triggered by "
"the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
@@ -59,17 +59,17 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
"iterations when checking full unroll profitability"));
static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+UnrollCount("unroll-count", cl::Hidden,
cl::desc("Use this unroll count for all loops including those with "
"unroll_count pragma values, for testing purposes"));
static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
static cl::opt<unsigned>
@@ -77,182 +77,95 @@ PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden
cl::desc("Unrolled size limit for loops with an unroll(full) or "
"unroll_count pragma."));
-namespace {
- class LoopUnroll : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
- CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
- CurrentPercentDynamicCostSavedThreshold =
- UnrollPercentDynamicCostSavedThreshold;
- CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
- CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
- CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
- CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
-
- UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
- UserPercentDynamicCostSavedThreshold =
- (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);
- UserDynamicCostSavingsDiscount =
- (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);
- UserAllowPartial = (P != -1) ||
- (UnrollAllowPartial.getNumOccurrences() > 0);
- UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
- UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
-
- initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
- }
- /// A magic value for use with the Threshold parameter to indicate
- /// that the loop unroll should be performed regardless of how much
- /// code expansion would result.
- static const unsigned NoThreshold = UINT_MAX;
-
- // Threshold to use when optsize is specified (and there is no
- // explicit -unroll-threshold).
- static const unsigned OptSizeUnrollThreshold = 50;
-
- // Default unroll count for loops with run-time trip count if
- // -unroll-count is not set
- static const unsigned UnrollRuntimeCount = 8;
-
- unsigned CurrentCount;
- unsigned CurrentThreshold;
- unsigned CurrentPercentDynamicCostSavedThreshold;
- unsigned CurrentDynamicCostSavingsDiscount;
- bool CurrentAllowPartial;
- bool CurrentRuntime;
-
- // Flags for whether the 'current' settings are user-specified.
- bool UserCount;
- bool UserThreshold;
- bool UserPercentDynamicCostSavedThreshold;
- bool UserDynamicCostSavingsDiscount;
- bool UserAllowPartial;
- bool UserRuntime;
-
- bool runOnLoop(Loop *L, LPPassManager &) override;
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
+/// A magic value for use with the Threshold parameter to indicate
+/// that the loop unroll should be performed regardless of how much
+/// code expansion would result.
+static const unsigned NoThreshold = UINT_MAX;
- // Fill in the UnrollingPreferences parameter with values from the
- // TargetTransformationInfo.
- void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
- TargetTransformInfo::UnrollingPreferences &UP) {
- UP.Threshold = CurrentThreshold;
- UP.PercentDynamicCostSavedThreshold =
- CurrentPercentDynamicCostSavedThreshold;
- UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;
- UP.OptSizeThreshold = OptSizeUnrollThreshold;
- UP.PartialThreshold = CurrentThreshold;
- UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
- UP.Count = CurrentCount;
- UP.MaxCount = UINT_MAX;
- UP.Partial = CurrentAllowPartial;
- UP.Runtime = CurrentRuntime;
- UP.AllowExpensiveTripCount = false;
- TTI.getUnrollingPreferences(L, UP);
- }
+/// Default unroll count for loops with run-time trip count if
+/// -unroll-count is not set
+static const unsigned DefaultUnrollRuntimeCount = 8;
- // Select and return an unroll count based on parameters from
- // user, unroll preferences, unroll pragmas, or a heuristic.
- // SetExplicitly is set to true if the unroll count is is set by
- // the user or a pragma rather than selected heuristically.
- unsigned
- selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount,
- const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly);
-
- // Select threshold values used to limit unrolling based on a
- // total unrolled size. Parameters Threshold and PartialThreshold
- // are set to the maximum unrolled size for fully and partially
- // unrolled loops respectively.
- void selectThresholds(const Loop *L, bool UsePragmaThreshold,
- const TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &Threshold, unsigned &PartialThreshold,
- unsigned &PercentDynamicCostSavedThreshold,
- unsigned &DynamicCostSavingsDiscount) {
- // Determine the current unrolling threshold. While this is
- // normally set from UnrollThreshold, it is overridden to a
- // smaller value if the current function is marked as
- // optimize-for-size, and the unroll threshold was not user
- // specified.
- Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
- PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
- PercentDynamicCostSavedThreshold =
- UserPercentDynamicCostSavedThreshold
- ? CurrentPercentDynamicCostSavedThreshold
- : UP.PercentDynamicCostSavedThreshold;
- DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount
- ? CurrentDynamicCostSavingsDiscount
- : UP.DynamicCostSavingsDiscount;
-
- if (!UserThreshold &&
- // FIXME: Use Function::optForSize().
- L->getHeader()->getParent()->hasFnAttribute(
- Attribute::OptimizeForSize)) {
- Threshold = UP.OptSizeThreshold;
- PartialThreshold = UP.PartialOptSizeThreshold;
- }
- if (UsePragmaThreshold) {
- // If the loop has an unrolling pragma, we want to be more
- // aggressive with unrolling limits. Set thresholds to at
- // least the PragmaTheshold value which is larger than the
- // default limits.
- if (Threshold != NoThreshold)
- Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
- if (PartialThreshold != NoThreshold)
- PartialThreshold =
- std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
- }
- }
- bool canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost, uint64_t RolledDynamicCost);
- };
-}
+/// Gather the various unrolling parameters based on the defaults, compiler
+/// flags, TTI overrides, pragmas, and user specified parameters.
+static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
+ Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
+ Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
+ Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
+ bool PragmaEnableUnroll, unsigned TripCount) {
+ TargetTransformInfo::UnrollingPreferences UP;
-char LoopUnroll::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+ // Set up the defaults
+ UP.Threshold = 150;
+ UP.PercentDynamicCostSavedThreshold = 20;
+ UP.DynamicCostSavingsDiscount = 2000;
+ UP.OptSizeThreshold = 50;
+ UP.PartialThreshold = UP.Threshold;
+ UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
+ UP.Count = 0;
+ UP.MaxCount = UINT_MAX;
+ UP.Partial = false;
+ UP.Runtime = false;
+ UP.AllowExpensiveTripCount = false;
+
+ // Override with any target specific settings
+ TTI.getUnrollingPreferences(L, UP);
+
+ // Apply size attributes
+ if (L->getHeader()->getParent()->optForSize()) {
+ UP.Threshold = UP.OptSizeThreshold;
+ UP.PartialThreshold = UP.PartialOptSizeThreshold;
+ }
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime) {
- return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
-}
+ // Apply unroll count pragmas
+ if (PragmaCount)
+ UP.Count = PragmaCount;
+ else if (PragmaFullUnroll)
+ UP.Count = TripCount;
-Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+ // Apply any user values specified by cl::opt
+ if (UnrollThreshold.getNumOccurrences() > 0) {
+ UP.Threshold = UnrollThreshold;
+ UP.PartialThreshold = UnrollThreshold;
+ }
+ if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
+ UP.PercentDynamicCostSavedThreshold =
+ UnrollPercentDynamicCostSavedThreshold;
+ if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
+ UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
+ if (UnrollCount.getNumOccurrences() > 0)
+ UP.Count = UnrollCount;
+ if (UnrollAllowPartial.getNumOccurrences() > 0)
+ UP.Partial = UnrollAllowPartial;
+ if (UnrollRuntime.getNumOccurrences() > 0)
+ UP.Runtime = UnrollRuntime;
+
+ // Apply user values provided by argument
+ if (UserThreshold.hasValue()) {
+ UP.Threshold = *UserThreshold;
+ UP.PartialThreshold = *UserThreshold;
+ }
+ if (UserCount.hasValue())
+ UP.Count = *UserCount;
+ if (UserAllowPartial.hasValue())
+ UP.Partial = *UserAllowPartial;
+ if (UserRuntime.hasValue())
+ UP.Runtime = *UserRuntime;
+
+ if (PragmaCount > 0 ||
+ ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaTheshold value
+ // which is larger than the default limits.
+ if (UP.Threshold != NoThreshold)
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ if (UP.PartialThreshold != NoThreshold)
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
+
+ return UP;
}
namespace {
@@ -793,12 +706,11 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
L->setLoopID(NewLoopID);
}
-bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost,
- uint64_t RolledDynamicCost) {
-
+static bool canUnrollCompletely(Loop *L, unsigned Threshold,
+ unsigned PercentDynamicCostSavedThreshold,
+ unsigned DynamicCostSavingsDiscount,
+ uint64_t UnrolledCost,
+ uint64_t RolledDynamicCost) {
if (Threshold == NoThreshold) {
DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
return true;
@@ -846,60 +758,13 @@ bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
return false;
}
-unsigned LoopUnroll::selectUnrollCount(
- const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly) {
- SetExplicitly = true;
-
- // User-specified count (either as a command-line option or
- // constructor parameter) has highest precedence.
- unsigned Count = UserCount ? CurrentCount : 0;
-
- // If there is no user-specified count, unroll pragmas have the next
- // highest precedence.
- if (Count == 0) {
- if (PragmaCount) {
- Count = PragmaCount;
- } else if (PragmaFullUnroll) {
- Count = TripCount;
- }
- }
-
- if (Count == 0)
- Count = UP.Count;
-
- if (Count == 0) {
- SetExplicitly = false;
- if (TripCount == 0)
- // Runtime trip count.
- Count = UnrollRuntimeCount;
- else
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- Count = TripCount;
- }
- if (TripCount && Count > TripCount)
- return TripCount;
- return Count;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
- if (skipOptnoneFunction(L))
- return false;
-
- Function &F = *L->getHeader()->getParent();
-
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ AssumptionCache &AC, bool PreserveLCSSA,
+ Optional<unsigned> ProvidedCount,
+ Optional<unsigned> ProvidedThreshold,
+ Optional<bool> ProvidedAllowPartial,
+ Optional<bool> ProvidedRuntime) {
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
@@ -912,9 +777,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
unsigned PragmaCount = UnrollCountPragmaValue(L);
bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
- TargetTransformInfo::UnrollingPreferences UP;
- getUnrollingPreferences(L, TTI, UP);
-
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
@@ -929,11 +791,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
- // Select an initial unroll count. This may be reduced later based
- // on size thresholds.
- bool CountSetExplicitly;
- unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
- PragmaCount, UP, CountSetExplicitly);
+ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+ L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
+ TripCount);
+
+ unsigned Count = UP.Count;
+ bool CountSetExplicitly = Count != 0;
+ // Use a heuristic count if we didn't set anything explicitly.
+ if (!CountSetExplicitly)
+ Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
+ if (TripCount && Count > TripCount)
+ Count = TripCount;
unsigned NumInlineCandidates;
bool notDuplicatable;
@@ -955,21 +824,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return false;
}
- unsigned Threshold, PartialThreshold;
- unsigned PercentDynamicCostSavedThreshold;
- unsigned DynamicCostSavingsDiscount;
- // Only use the high pragma threshold when we have a target unroll factor such
- // as with "#pragma unroll N" or a pragma indicating full unrolling and the
- // trip count is known. Otherwise we rely on the standard threshold to
- // heuristically select a reasonable unroll count.
- bool UsePragmaThreshold =
- PragmaCount > 0 ||
- ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
-
- selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
- PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount);
-
// Given Count, TripCount and thresholds determine the type of
// unrolling which is to be performed.
enum { Full = 0, Partial = 1, Runtime = 2 };
@@ -977,19 +831,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
if (TripCount && Count == TripCount) {
Unrolling = Partial;
// If the loop is really small, we don't need to run an expensive analysis.
- if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
+ if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
UnrolledSize, UnrolledSize)) {
Unrolling = Full;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost =
- analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
- Threshold + DynamicCostSavingsDiscount))
- if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount, Cost->UnrolledCost,
- Cost->RolledDynamicCost)) {
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, TripCount, DT, *SE, TTI,
+ UP.Threshold + UP.DynamicCostSavingsDiscount))
+ if (canUnrollCompletely(L, UP.Threshold,
+ UP.PercentDynamicCostSavedThreshold,
+ UP.DynamicCostSavingsDiscount,
+ Cost->UnrolledCost, Cost->RolledDynamicCost)) {
Unrolling = Full;
}
}
@@ -1001,23 +856,22 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
- (UserRuntime ? CurrentRuntime : UP.Runtime);
+ bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
// Don't unroll a runtime trip count loop with unroll full pragma.
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
- bool AllowPartial = PragmaEnableUnroll ||
- (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
+ bool AllowPartial = PragmaEnableUnroll || UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
+ if (UP.PartialThreshold != NoThreshold &&
+ UnrolledSize > UP.PartialThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
+ Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
while (Count != 0 && TripCount % Count != 0)
Count--;
}
@@ -1029,7 +883,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
}
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
- while (Count != 0 && UnrolledSize > PartialThreshold) {
+ while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
Count >>= 1;
UnrolledSize = (LoopSize-2) * Count + 2;
}
@@ -1086,3 +940,91 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return true;
}
+
+namespace {
+class LoopUnroll : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll(Optional<unsigned> Threshold = None,
+ Optional<unsigned> Count = None,
+ Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+ : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold),
+ ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
+
+ Optional<unsigned> ProvidedCount;
+ Optional<unsigned> ProvidedThreshold;
+ Optional<bool> ProvidedAllowPartial;
+ Optional<bool> ProvidedRuntime;
+
+ bool runOnLoop(Loop *L, LPPassManager &) override {
+ if (skipOptnoneFunction(L))
+ return false;
+
+ Function &F = *L->getHeader()->getParent();
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
+ ProvidedThreshold, ProvidedAllowPartial,
+ ProvidedRuntime);
+ }
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+ int Runtime) {
+ // TODO: It would make more sense for this function to take the optionals
+ // directly, but that's dangerous since it would silently break out of tree
+ // callers.
+ return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ Count == -1 ? None : Optional<unsigned>(Count),
+ AllowPartial == -1 ? None
+ : Optional<bool>(AllowPartial),
+ Runtime == -1 ? None : Optional<bool>(Runtime));
+}
+
+Pass *llvm::createSimpleLoopUnrollPass() {
+ return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7354016..6b43b0f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -529,12 +529,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// We found an instruction that may write to the loaded memory.
// We can try to promote at this position instead of the store
- // position if nothing alias the store memory after this.
+ // position if nothing alias the store memory after this and the store
+ // destination is not in the range.
P = &*I;
for (; I != E; ++I) {
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
- DEBUG(dbgs() << "Alias " << *I << "\n");
+ if (&*I == SI->getOperand(1) ||
+ AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
P = nullptr;
break;
}
@@ -628,13 +629,39 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Ensure that the value being stored is something that can be memset'able a
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
- if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ auto *V = SI->getOperand(0);
+ if (Value *ByteVal = isBytewiseValue(V)) {
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
+ // If we have an aggregate, we try to promote it to memset regardless
+ // of opportunity for merging as it can expose optimization opportunities
+ // in subsequent passes.
+ auto *T = V->getType();
+ if (T->isAggregateType()) {
+ uint64_t Size = DL.getTypeStoreSize(T);
+ unsigned Align = SI->getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(T);
+ IRBuilder<> Builder(SI);
+ auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
+ Size, Align, SI->isVolatile());
+
+ DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
+
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ NumMemSetInfer++;
+
+ // Make sure we do not invalidate the iterator.
+ BBI = M->getIterator();
+ return true;
+ }
+ }
+
return false;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 28c610c..b56b355 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -499,7 +499,7 @@ static bool isGCSafepointPoll(Function &F) {
static bool shouldRewriteFunction(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 401a740..bcadd4e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2155,7 +2155,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
// During the initial run we will get to the root of the tree.
// But if we get here while we are redoing instructions, there is no
// guarantee that the root will be visited. So Redo later
- if (BO->user_back() != BO)
+ if (BO->user_back() != BO &&
+ BO->getParent() == BO->user_back()->getParent())
RedoInsts.insert(BO->user_back());
return;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 5d253be..d77d574 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -78,6 +78,13 @@ static cl::opt<bool>
AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
cl::Hidden, cl::init(true));
+/// Should we split vectors of pointers into their individual elements? This
+/// is known to be buggy, but the alternate implementation isn't yet ready.
+/// This is purely to provide a debugging and dianostic hook until the vector
+/// split is replaced with vector relocations.
+static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden,
+ cl::init(true));
+
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -357,10 +364,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// particular element in 'I'.
static BaseDefiningValueResult
findBaseDefiningValueOfVector(Value *I) {
- assert(I->getType()->isVectorTy() &&
- cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
-
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
@@ -368,26 +371,10 @@ findBaseDefiningValueOfVector(Value *I) {
// An incoming argument to the function is a base pointer
return BaseDefiningValueResult(I, true);
- // We shouldn't see the address of a global as a vector value?
- assert(!isa<GlobalVariable>(I) &&
- "unexpected global variable found in base of vector");
-
- // inlining could possibly introduce phi node that contains
- // undef if callee has multiple returns
- if (isa<UndefValue>(I))
- // utterly meaningless, but useful for dealing with partially optimized
- // code.
+ if (isa<Constant>(I))
+ // Constant vectors consist only of constant pointers.
return BaseDefiningValueResult(I, true);
- // Due to inheritance, this must be _after_ the global variable and undef
- // checks
- if (Constant *Con = dyn_cast<Constant>(I)) {
- assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
- "order of checks wrong!");
- assert(Con->isNullValue() && "null is the only case which makes sense");
- return BaseDefiningValueResult(Con, true);
- }
-
if (isa<LoadInst>(I))
return BaseDefiningValueResult(I, true);
@@ -417,11 +404,11 @@ findBaseDefiningValueOfVector(Value *I) {
/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
/// from pointer to vector type or back.
static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+ assert(I->getType()->isPtrOrPtrVectorTy() &&
+ "Illegal to ask for the base pointer of a non-pointer type");
+
if (I->getType()->isVectorTy())
return findBaseDefiningValueOfVector(I);
-
- assert(I->getType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
@@ -1310,18 +1297,29 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
assert(Index < LiveVec.size() && "Bug in std::find?");
return Index;
};
-
- // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
- // unique declarations for each pointer type, but this proved problematic
- // because the intrinsic mangling code is incomplete and fragile. Since
- // we're moving towards a single unified pointer type anyways, we can just
- // cast everything to an i8* of the right address space. A bitcast is added
- // later to convert gc_relocate to the actual value's type.
Module *M = StatepointToken->getModule();
- auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
- Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
- Value *GCRelocateDecl =
- Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+
+ // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
+ // element type is i8 addrspace(1)*). We originally generated unique
+ // declarations for each pointer type, but this proved problematic because
+ // the intrinsic mangling code is incomplete and fragile. Since we're moving
+ // towards a single unified pointer type anyways, we can just cast everything
+ // to an i8* of the right address space. A bitcast is added later to convert
+ // gc_relocate to the actual value's type.
+ auto getGCRelocateDecl = [&] (Type *Ty) {
+ assert(isHandledGCPointerType(Ty));
+ auto AS = Ty->getScalarType()->getPointerAddressSpace();
+ Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
+ if (auto *VT = dyn_cast<VectorType>(Ty))
+ NewTy = VectorType::get(NewTy, VT->getNumElements());
+ return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
+ {NewTy});
+ };
+
+ // Lazily populated map from input types to the canonicalized form mentioned
+ // in the comment above. This should probably be cached somewhere more
+ // broadly.
+ DenseMap<Type*, Value*> TypeToDeclMap;
for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
@@ -1329,6 +1327,11 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
Value *LiveIdx = Builder.getInt32(LiveStart + i);
+ Type *Ty = LiveVariables[i]->getType();
+ if (!TypeToDeclMap.count(Ty))
+ TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
+ Value *GCRelocateDecl = TypeToDeclMap[Ty];
+
// only specify a debug name if we can give a useful one
CallInst *Reloc = Builder.CreateCall(
GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
@@ -2380,15 +2383,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
- // the cost of some possible performance loss. TODO: it would be nice to
- // natively support vectors all the way through the backend so we don't need
- // to scalarize here.
- for (size_t i = 0; i < Records.size(); i++) {
- PartiallyConstructedSafepointRecord &Info = Records[i];
- Instruction *Statepoint = ToUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
- Info.PointerToBase, DT);
- }
+ // the cost of some possible performance loss. Note: This is known to not
+ // handle updating of the side tables correctly which can lead to relocation
+ // bugs when the same vector is live at multiple statepoints. We're in the
+ // process of implementing the alternate lowering - relocating the
+ // vector-of-pointers as first class item and updating the backend to
+ // understand that - but that's not yet complete.
+ if (UseVectorSplit)
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+ Instruction *Statepoint = ToUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+ Info.PointerToBase, DT);
+ }
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
@@ -2467,7 +2474,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
#ifndef NDEBUG
// sanity check
for (auto *Ptr : Live)
- assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
+ assert(isHandledGCPointerType(Ptr->getType()) &&
+ "must be a gc pointer type");
#endif
relocationViaAlloca(F, DT, Live, Records);
@@ -2547,7 +2555,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
static bool shouldRewriteStatepointsIn(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 2fca803..8569e08 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -757,9 +757,14 @@ void SCCPSolver::visitCastInst(CastInst &I) {
LatticeVal OpSt = getValueState(I.getOperand(0));
if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
markOverdefined(&I);
- else if (OpSt.isConstant()) // Propagate constant value
- markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
- OpSt.getConstant(), I.getType()));
+ else if (OpSt.isConstant()) {
+ Constant *C =
+ ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType());
+ if (isa<UndefValue>(C))
+ return;
+ // Propagate constant value
+ markConstant(&I, C);
+ }
}
@@ -859,10 +864,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I,
- ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant());
+ // X op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If something is undef, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -917,10 +926,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
- V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::getCompare(
+ I.getPredicate(), V1State.getConstant(), V2State.getConstant());
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If operands are still undefined, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -1020,8 +1032,11 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
Constant *Ptr = Operands[0];
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
- markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
- Indices));
+ Constant *C =
+ ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
+ if (isa<UndefValue>(C))
+ return;
+ markConstant(&I, C);
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1061,9 +1076,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
Constant *Ptr = PtrVal.getConstant();
- // load null -> null
+ // load null is undefined.
if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
- return markConstant(IV, &I, UndefValue::get(I.getType()));
+ return;
// Transform load (constant global) into the value loaded.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
@@ -1079,8 +1094,11 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL))
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) {
+ if (isa<UndefValue>(C))
+ return;
return markConstant(IV, &I, C);
+ }
// Otherwise we cannot say for certain what value this load will produce.
// Bail out.
@@ -1122,8 +1140,12 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands, TLI))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI)) {
+ // call -> undef.
+ if (isa<UndefValue>(C))
+ return;
return markConstant(I, C);
+ }
}
// Otherwise, we don't know anything about this call, mark it overdefined.
@@ -1379,6 +1401,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// X % undef -> undef. No change.
if (Op1LV.isUndefined()) break;
+ // X / 0 -> undef. No change.
+ // X % 0 -> undef. No change.
+ if (Op1LV.isConstant() && Op1LV.getConstant()->isZeroValue())
+ break;
+
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
markForcedConstant(&I, Constant::getNullValue(ITy));
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0e0b00d..4e84d72 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -425,9 +425,7 @@ bool TailCallElim::runTRE(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
+ for (PHINode *PN : ArgumentPHIs) {
// If the PHI Node is a dynamic constant, replace it with the value it is.
if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
@@ -468,10 +466,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// return value of the call, it must only use things that are defined before
// the call, or movable instructions between the call and the instruction
// itself.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (I->getOperand(i) == CI)
- return false;
- return true;
+ return std::find(I->op_begin(), I->op_end(), CI) == I->op_end();
}
/// Return true if the specified value is the same when the return would exit
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index a5137e9..72db980 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -626,11 +626,17 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
Clone2->setName(Twine("lpad") + Suffix2);
NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
- // Create a PHI node for the two cloned landingpad instructions.
- PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
- PN->addIncoming(Clone1, NewBB1);
- PN->addIncoming(Clone2, NewBB2);
- LPad->replaceAllUsesWith(PN);
+ // Create a PHI node for the two cloned landingpad instructions only
+ // if the original landingpad instruction has some uses.
+ if (!LPad->use_empty()) {
+ assert(!LPad->getType()->isTokenTy() &&
+ "Split cannot be applied if LPad is token type. Otherwise an "
+ "invalid PHINode of token type would be created.");
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ }
LPad->eraseFromParent();
} else {
// There is no second clone. Just replace the landing pad with the first
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 854a3b8..6454afb 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,27 +266,14 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- CloningDirector *Director;
- ValueMapTypeRemapper *TypeMapper;
- ValueMaterializer *Materializer;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap, bool moduleLevelChanges,
- const char *nameSuffix, ClonedCodeInfo *codeInfo,
- CloningDirector *Director)
+ const char *nameSuffix, ClonedCodeInfo *codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo), Director(Director) {
- // These are optional components. The Director may return null.
- if (Director) {
- TypeMapper = Director->getTypeRemapper();
- Materializer = Director->getValueMaterializer();
- } else {
- TypeMapper = nullptr;
- Materializer = nullptr;
- }
- }
+ CodeInfo(codeInfo) {}
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
@@ -332,23 +319,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
II != IE; ++II) {
- // If the "Director" remaps the instruction, don't clone it.
- if (Director) {
- CloningDirector::CloningAction Action =
- Director->handleInstruction(VMap, &*II, NewBB);
- // If the cloning director says stop, we want to stop everything, not
- // just break out of the loop (which would cause the terminator to be
- // cloned). The cloning director is responsible for inserting a proper
- // terminator into the new basic block in this case.
- if (Action == CloningDirector::StopCloningBB)
- return;
- // If the cloning director says skip, continue to the next instruction.
- // In this case, the cloning director is responsible for mapping the
- // skipped instruction to some value that is defined in the new
- // basic block.
- if (Action == CloningDirector::SkipInstruction)
- continue;
- }
Instruction *NewInst = II->clone();
@@ -356,8 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// nodes for which we defer processing until we update the CFG.
if (!isa<PHINode>(NewInst)) {
RemapInstruction(NewInst, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
@@ -397,26 +366,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
- if (Director) {
- CloningDirector::CloningAction Action
- = Director->handleInstruction(VMap, OldTI, NewBB);
- // If the cloning director says stop, we want to stop everything, not
- // just break out of the loop (which would cause the terminator to be
- // cloned). The cloning director is responsible for inserting a proper
- // terminator into the new basic block in this case.
- if (Action == CloningDirector::StopCloningBB)
- return;
- if (Action == CloningDirector::CloneSuccessors) {
- // If the director says to skip with a terminate instruction, we still
- // need to clone this block's successors.
- const TerminatorInst *TI = NewBB->getTerminator();
- for (const BasicBlock *Succ : TI->successors())
- ToClone.push_back(Succ);
- return;
- }
- assert(Action != CloningDirector::SkipInstruction &&
- "SkipInstruction is not valid for terminators.");
- }
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
// If the condition was a known constant in the callee...
@@ -485,19 +434,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo,
- CloningDirector *Director) {
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
ValueMapTypeRemapper *TypeMapper = nullptr;
ValueMaterializer *Materializer = nullptr;
- if (Director) {
- TypeMapper = Director->getTypeRemapper();
- Materializer = Director->getValueMaterializer();
- }
-
#ifndef NDEBUG
// If the cloning starts at the beginning of the function, verify that
// the function arguments are mapped.
@@ -507,7 +450,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, Director);
+ NameSuffix, CodeInfo);
const BasicBlock *StartingBB;
if (StartingInst)
StartingBB = StartingInst->getParent();
@@ -731,8 +674,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
- ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
- nullptr);
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
}
/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 0e386ac..d2793e5 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -1051,9 +1052,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
- if (ExtendedArg)
- Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+ if (ExtendedArg) {
+ // We're now only describing a subset of the variable. The piece we're
+ // describing will always be smaller than the variable size, because
+ // VariableSize == Size of Alloca described by DDI. Since SI stores
+ // to the alloca described by DDI, if it's first operand is an extend,
+ // we're guaranteed that before extension, the value was narrower than
+ // the size of the alloca, hence the size of the described variable.
+ SmallVector<uint64_t, 3> NewDIExpr;
+ unsigned PieceOffset = 0;
+ // If this already is a bit piece, we drop the bit piece from the expression
+ // and record the offset.
+ if (DIExpr->isBitPiece()) {
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
+ PieceOffset = DIExpr->getBitPieceOffset();
+ } else {
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ }
+ NewDIExpr.push_back(dwarf::DW_OP_bit_piece);
+ NewDIExpr.push_back(PieceOffset); //Offset
+ const DataLayout &DL = DDI->getModule()->getDataLayout();
+ NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size
+ Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar,
+ Builder.createExpression(NewDIExpr),
DDI->getDebugLoc(), SI);
+ }
else
Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
DDI->getDebugLoc(), SI);
@@ -1407,7 +1430,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
-bool llvm::removeUnreachableBlocks(Function &F) {
+bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
SmallPtrSet<BasicBlock*, 128> Reachable;
bool Changed = markAliveBlocks(F, Reachable);
@@ -1428,6 +1451,8 @@ bool llvm::removeUnreachableBlocks(Function &F) {
++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(&*BB);
+ if (LVI)
+ LVI->eraseBlock(&*BB);
BB->dropAllReferences();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 2499b88..eea9237 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -528,7 +528,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Loop *OuterL = L->getParentLoop();
// Update LoopInfo if the loop is completely removed.
if (CompletelyUnroll)
- LI->updateUnloop(L);;
+ LI->markAsRemoved(L);
// If we have a pass and a DominatorTree we should re-simplify impacted loops
// to ensure subsequent analyses can rely on this form. We want to simplify
@@ -542,7 +542,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
- // LoopInfo's been updated by updateUnloop.
+ // LoopInfo's been updated by markAsRemoved.
Loop *LatchLoop = LI->getLoopFor(Latches.back());
if (!OuterL->contains(LatchLoop))
while (OuterL->getParentLoop() != LatchLoop)
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index e038805..fa958e9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -599,7 +599,7 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
FastMathFlags FMF;
FMF.setUnsafeAlgebra();
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
Value *Cmp;
if (RK == MRK_FloatMin || RK == MRK_FloatMax)
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3bb3fa5..3125a2c 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -141,6 +141,8 @@ class SimplifyCFGOpt {
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifySingleResume(ResumeInst *RI);
+ bool SimplifyCommonResume(ResumeInst *RI);
bool SimplifyCleanupReturn(CleanupReturnInst *RI);
bool SimplifyUnreachable(UnreachableInst *UI);
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
@@ -3239,14 +3241,101 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
}
bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
- // If this is a trivial landing pad that just continues unwinding the caught
- // exception then zap the landing pad, turning its invokes into calls.
+ if (isa<PHINode>(RI->getValue()))
+ return SimplifyCommonResume(RI);
+ else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
+ RI->getValue() == RI->getParent()->getFirstNonPHI())
+ // The resume must unwind the exception that caused control to branch here.
+ return SimplifySingleResume(RI);
+
+ return false;
+}
+
+// Simplify resume that is shared by several landing pads (phi of landing pad).
+bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
+ BasicBlock *BB = RI->getParent();
+
+ // Check that there are no other instructions except for debug intrinsics
+ // between the phi of landing pads (RI->getValue()) and resume instruction.
+ BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
+ E = RI->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ SmallSet<BasicBlock *, 4> TrivialUnwindBlocks;
+ auto *PhiLPInst = cast<PHINode>(RI->getValue());
+
+ // Check incoming blocks to see if any of them are trivial.
+ for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues();
+ Idx != End; Idx++) {
+ auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
+ auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
+
+ // If the block has other successors, we can not delete it because
+ // it has other dependents.
+ if (IncomingBB->getUniqueSuccessor() != BB)
+ continue;
+
+ auto *LandingPad =
+ dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
+ // Not the landing pad that caused the control to branch here.
+ if (IncomingValue != LandingPad)
+ continue;
+
+ bool isTrivial = true;
+
+ I = IncomingBB->getFirstNonPHI()->getIterator();
+ E = IncomingBB->getTerminator()->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I)) {
+ isTrivial = false;
+ break;
+ }
+
+ if (isTrivial)
+ TrivialUnwindBlocks.insert(IncomingBB);
+ }
+
+ // If no trivial unwind blocks, don't do any simplifications.
+ if (TrivialUnwindBlocks.empty()) return false;
+
+ // Turn all invokes that unwind here into calls.
+ for (auto *TrivialBB : TrivialUnwindBlocks) {
+ // Blocks that will be simplified should be removed from the phi node.
+ // Note there could be multiple edges to the resume block, and we need
+ // to remove them all.
+ while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
+ BB->removePredecessor(TrivialBB, true);
+
+ for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
+ PI != PE;) {
+ BasicBlock *Pred = *PI++;
+ removeUnwindEdge(Pred);
+ }
+
+ // In each SimplifyCFG run, only the current processed block can be erased.
+ // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
+ // of erasing TrivialBB, we only remove the branch to the common resume
+ // block so that we can later erase the resume block since it has no
+ // predecessors.
+ TrivialBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(RI->getContext(), TrivialBB);
+ }
+
+ // Delete the resume block if all its predecessors have been removed.
+ if (pred_empty(BB))
+ BB->eraseFromParent();
+
+ return !TrivialUnwindBlocks.empty();
+}
+
+// Simplify resume that is only used by a single (non-phi) landing pad.
+bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
- if (RI->getValue() != LPInst)
- // Not a landing pad, or the resume is not unwinding the exception that
- // caused control to branch here.
- return false;
+ assert (RI->getValue() == LPInst &&
+ "Resume must unwind the exception that caused control to here");
// Check that there are no other instructions except for debug intrinsics.
BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index dc5fee5..dc07440 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -997,7 +997,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(CI->getFastMathFlags());
+ B.setFastMathFlags(CI->getFastMathFlags());
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
@@ -1035,7 +1035,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(CI->getFastMathFlags());
+ B.setFastMathFlags(CI->getFastMathFlags());
// fmin((double)floatval1, (double)floatval2)
// -> (double)fminf(floatval1, floatval2)
@@ -1127,29 +1127,26 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Callee->getAttributes());
}
+ // FIXME: Use instruction-level FMF.
bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
- // pow(exp(x), y) -> exp(x*y)
+ // pow(exp(x), y) -> exp(x * y)
// pow(exp2(x), y) -> exp2(x * y)
- // We enable these only under fast-math. Besides rounding
- // differences the transformation changes overflow and
- // underflow behavior quite dramatically.
+ // We enable these only with fast-math. Besides rounding differences, the
+ // transformation changes overflow and underflow behavior quite dramatically.
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- if (UnsafeFPMath) {
- if (auto *OpC = dyn_cast<CallInst>(Op1)) {
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
+ LibFunc::Func Func;
+ Function *OpCCallee = OpC->getCalledFunction();
+ if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+ TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- FMF.setUnsafeAlgebra();
- B.SetFastMathFlags(FMF);
-
- LibFunc::Func Func;
- Function *OpCCallee = OpC->getCalledFunction();
- if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
- TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2))
- return EmitUnaryFloatFnCall(
- B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"),
- OpCCallee->getName(), B, OpCCallee->getAttributes());
+ B.setFastMathFlags(CI->getFastMathFlags());
+ Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
+ return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
+ OpCCallee->getAttributes());
}
}
@@ -1167,9 +1164,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
LibFunc::fabsl)) {
// In -ffast-math, pow(x, 0.5) -> sqrt(x).
- if (UnsafeFPMath)
+ if (CI->hasUnsafeAlgebra()) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
Callee->getAttributes());
+ }
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
@@ -1328,7 +1328,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
FMF.setNoSignedZeros();
FMF.setNoNaNs();
}
- B.SetFastMathFlags(FMF);
+ B.setFastMathFlags(FMF);
// We have a relaxed floating-point environment. We can ignore NaN-handling
// and transform to a compare and select. We do not have to consider errno or
@@ -1354,11 +1354,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(0)->isFloatingPointTy())
return Ret;
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ if (!CI->hasUnsafeAlgebra())
return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
- if (!OpC)
+
+ // The earlier call must also be unsafe in order to do these transforms.
+ if (!OpC || !OpC->hasUnsafeAlgebra())
return Ret;
// log(pow(x,y)) -> y*log(x)
@@ -1369,7 +1371,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
FMF.setUnsafeAlgebra();
- B.SetFastMathFlags(FMF);
+ B.setFastMathFlags(FMF);
LibFunc::Func Func;
Function *F = OpC->getCalledFunction();
@@ -1397,66 +1399,67 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+
+ if (!CI->hasUnsafeAlgebra())
return Ret;
- Value *Op = CI->getArgOperand(0);
- if (Instruction *I = dyn_cast<Instruction>(Op)) {
- if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
- // We're looking for a repeated factor in a multiplication tree,
- // so we can do this fold: sqrt(x * x) -> fabs(x);
- // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y).
- Value *Op0 = I->getOperand(0);
- Value *Op1 = I->getOperand(1);
- Value *RepeatOp = nullptr;
- Value *OtherOp = nullptr;
- if (Op0 == Op1) {
- // Simple match: the operands of the multiply are identical.
- RepeatOp = Op0;
- } else {
- // Look for a more complicated pattern: one of the operands is itself
- // a multiply, so search for a common factor in that multiply.
- // Note: We don't bother looking any deeper than this first level or for
- // variations of this pattern because instcombine's visitFMUL and/or the
- // reassociation pass should give us this form.
- Value *OtherMul0, *OtherMul1;
- if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
- // Pattern: sqrt((x * y) * z)
- if (OtherMul0 == OtherMul1) {
- // Matched: sqrt((x * x) * z)
- RepeatOp = OtherMul0;
- OtherOp = Op1;
- }
- }
- }
- if (RepeatOp) {
- // Fast math flags for any created instructions should match the sqrt
- // and multiply.
- // FIXME: We're not checking the sqrt because it doesn't have
- // fast-math-flags (see earlier comment).
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(I->getFastMathFlags());
- // If we found a repeated factor, hoist it out of the square root and
- // replace it with the fabs of that factor.
- Module *M = Callee->getParent();
- Type *ArgType = Op->getType();
- Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
- Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
- if (OtherOp) {
- // If we found a non-repeated factor, we still need to get its square
- // root. We then multiply that by the value that was simplified out
- // of the square root calculation.
- Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
- Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
- return B.CreateFMul(FabsCall, SqrtCall);
- }
- return FabsCall;
+ Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
+ if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ return Ret;
+
+ // We're looking for a repeated factor in a multiplication tree,
+ // so we can do this fold: sqrt(x * x) -> fabs(x);
+ // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+ Value *RepeatOp = nullptr;
+ Value *OtherOp = nullptr;
+ if (Op0 == Op1) {
+ // Simple match: the operands of the multiply are identical.
+ RepeatOp = Op0;
+ } else {
+ // Look for a more complicated pattern: one of the operands is itself
+ // a multiply, so search for a common factor in that multiply.
+ // Note: We don't bother looking any deeper than this first level or for
+ // variations of this pattern because instcombine's visitFMUL and/or the
+ // reassociation pass should give us this form.
+ Value *OtherMul0, *OtherMul1;
+ if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
+ // Pattern: sqrt((x * y) * z)
+ if (OtherMul0 == OtherMul1 &&
+ cast<Instruction>(Op0)->hasUnsafeAlgebra()) {
+ // Matched: sqrt((x * x) * z)
+ RepeatOp = OtherMul0;
+ OtherOp = Op1;
}
}
}
- return Ret;
-}
+ if (!RepeatOp)
+ return Ret;
+ // Fast math flags for any created instructions should match the sqrt
+ // and multiply.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(I->getFastMathFlags());
+
+ // If we found a repeated factor, hoist it out of the square root and
+ // replace it with the fabs of that factor.
+ Module *M = Callee->getParent();
+ Type *ArgType = I->getType();
+ Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
+ if (OtherOp) {
+ // If we found a non-repeated factor, we still need to get its square
+ // root. We then multiply that by the value that was simplified out
+ // of the square root calculation.
+ Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
+ return B.CreateFMul(FabsCall, SqrtCall);
+ }
+ return FabsCall;
+}
+
+// TODO: Generalize to handle any trig function and its inverse.
Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
@@ -1471,13 +1474,15 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(0)->isFloatingPointTy())
return Ret;
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
- return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
if (!OpC)
return Ret;
+ // Both calls must allow unsafe optimizations in order to remove them.
+ if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra())
+ return Ret;
+
// tan(atan(x)) -> x
// tanf(atanf(x)) -> x
// tanl(atanl(x)) -> x
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 2e361d3..f47ddb9 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -222,8 +222,17 @@ static void resolveCycles(Metadata *MD, bool AllowTemps) {
if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
if (AllowTemps && N->isTemporary())
return;
- if (!N->isResolved())
- N->resolveCycles(AllowTemps);
+ if (!N->isResolved()) {
+ if (AllowTemps)
+ // Note that this will drop RAUW support on any temporaries, which
+ // blocks uniquing. If this ends up being an issue, in the future
+ // we can experiment with delaying resolving these nodes until
+ // after metadata is fully materialized (i.e. when linking metadata
+ // as a postpass after function importing).
+ N->resolveNonTemporaries();
+ else
+ N->resolveCycles();
+ }
}
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9ed44d1..27d3337 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -222,7 +222,7 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
}
}
}
-
+
/// \returns \p I after propagating metadata from \p VL.
static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
Instruction *I0 = cast<Instruction>(VL[0]);
@@ -506,7 +506,7 @@ private:
}
return Last;
}
-
+
/// -- Vectorization State --
/// Holds all of the tree entries.
std::vector<TreeEntry> VectorizableTree;
@@ -884,7 +884,7 @@ private:
/// The current size of the scheduling region.
int ScheduleRegionSize;
-
+
/// The maximum size allowed for the scheduling region.
int ScheduleRegionSizeLimit;
@@ -1089,7 +1089,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
newTreeEntry(VL, false);
return;
}
-
+
// Check that every instructions appears once in this bundle.
for (unsigned i = 0, e = VL.size(); i < e; ++i)
for (unsigned j = i+1; j < e; ++j)
@@ -1711,7 +1711,7 @@ int BoUpSLP::getSpillCost() {
int Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
- Instruction *PrevInst = nullptr;
+ Instruction *PrevInst = nullptr;
for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
@@ -1736,7 +1736,7 @@ int BoUpSLP::getSpillCost() {
for (auto &J : PrevInst->operands()) {
if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
LiveValues.insert(cast<Instruction>(&*J));
- }
+ }
// Now find the sequence of instructions between PrevInst and Inst.
BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
@@ -1780,30 +1780,29 @@ int BoUpSLP::getTreeCost() {
unsigned BundleWidth = VectorizableTree[0].Scalars.size();
- for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) {
- int C = getEntryCost(&VectorizableTree[i]);
+ for (TreeEntry &TE : VectorizableTree) {
+ int C = getEntryCost(&TE);
DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with "
- << *VectorizableTree[i].Scalars[0] << " .\n");
+ << TE.Scalars[0] << " .\n");
Cost += C;
}
SmallSet<Value *, 16> ExtractCostCalculated;
int ExtractCost = 0;
- for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
- I != E; ++I) {
+ for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
- if (!ExtractCostCalculated.insert(I->Scalar).second)
+ if (!ExtractCostCalculated.insert(EU.Scalar).second)
continue;
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
// removed as well).
- if (EphValues.count(I->User))
+ if (EphValues.count(EU.User))
continue;
- VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
+ VectorType *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
- I->Lane);
+ EU.Lane);
}
Cost += getSpillCost();
@@ -2551,7 +2550,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *BoUpSLP::vectorizeTree() {
-
+
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
@@ -3072,10 +3071,10 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
}
void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
-
+
if (!BS->ScheduleStart)
return;
-
+
DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
BS->resetSchedule();
@@ -3590,7 +3589,7 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
/// \param NumEltsToRdx The number of elements that should be reduced in the
/// vector.
/// \param IsPairwise Whether the reduction is a pairwise or splitting
-/// reduction. A pairwise reduction will generate a mask of
+/// reduction. A pairwise reduction will generate a mask of
/// <0,2,...> or <1,3,..> while a splitting reduction will generate
/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
/// \param IsLeft True will generate a mask of even elements, odd otherwise.
@@ -3773,7 +3772,7 @@ public:
IRBuilder<> Builder(ReductionRoot);
FastMathFlags Unsafe;
Unsafe.setUnsafeAlgebra();
- Builder.SetFastMathFlags(Unsafe);
+ Builder.setFastMathFlags(Unsafe);
unsigned i = 0;
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
@@ -4018,9 +4017,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Collect the incoming values from the PHIs.
Incoming.clear();
- for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
- ++instr) {
- PHINode *P = dyn_cast<PHINode>(instr);
+ for (Instruction &I : *BB) {
+ PHINode *P = dyn_cast<PHINode>(&I);
if (!P)
break;
OpenPOWER on IntegriCloud